From beb6e95903ca73f8f01fd6c9cdd24d2df302dd12 Mon Sep 17 00:00:00 2001 From: bill-auger Date: Thu, 23 May 2019 13:59:22 -0400 Subject: [iceweasel]: upgrade to v67.0 (broken) --- libre/iceweasel/0001-bz-1468911.patch | 26961 ------------------------------- libre/iceweasel/0001-bz-1521249.patch | 27006 ++++++++++++++++++++++++++++++++ libre/iceweasel/PKGBUILD | 20 +- 3 files changed, 27016 insertions(+), 26971 deletions(-) delete mode 100644 libre/iceweasel/0001-bz-1468911.patch create mode 100644 libre/iceweasel/0001-bz-1521249.patch (limited to 'libre') diff --git a/libre/iceweasel/0001-bz-1468911.patch b/libre/iceweasel/0001-bz-1468911.patch deleted file mode 100644 index 6e5293f42..000000000 --- a/libre/iceweasel/0001-bz-1468911.patch +++ /dev/null @@ -1,26961 +0,0 @@ -From b68f15b7a57f7df52d955f7f75f9df3b78041764 Mon Sep 17 00:00:00 2001 -Message-Id: -From: "Jan Alexander Steffens (heftig)" -Date: Tue, 19 Mar 2019 20:45:22 +0100 -Subject: [PATCH] bz 1468911 - -https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 ---- - .cargo/config.in | 5 + - Cargo.lock | 53 +- - Cargo.toml | 1 + - third_party/rust/cfg-if/.cargo-checksum.json | 2 +- - third_party/rust/cfg-if/Cargo.toml | 28 +- - third_party/rust/cfg-if/README.md | 18 +- - third_party/rust/cfg-if/src/lib.rs | 53 +- - .../rust/encoding_rs/.cargo-checksum.json | 2 +- - third_party/rust/encoding_rs/Cargo.toml | 12 +- - third_party/rust/encoding_rs/README.md | 52 +- - third_party/rust/encoding_rs/build.rs | 8 + - third_party/rust/encoding_rs/src/handles.rs | 2 +- - third_party/rust/encoding_rs/src/lib.rs | 7 +- - third_party/rust/encoding_rs/src/mem.rs | 24 +- - .../rust/encoding_rs/src/simd_funcs.rs | 93 +- - .../rust/encoding_rs/src/x_user_defined.rs | 7 +- - third_party/rust/packed_simd/.appveyor.yml | 59 + - .../rust/packed_simd/.cargo-checksum.json | 1 + - third_party/rust/packed_simd/.travis.yml | 308 ++++ - third_party/rust/packed_simd/Cargo.toml | 42 + - .../rust/{simd => packed_simd}/LICENSE-APACHE | 0 - .../rust/{simd => packed_simd}/LICENSE-MIT | 4 +- - third_party/rust/packed_simd/bors.toml | 3 + - third_party/rust/packed_simd/build.rs | 8 + - third_party/rust/packed_simd/ci/all.sh | 71 + - .../packed_simd/ci/android-install-ndk.sh | 37 + - .../packed_simd/ci/android-install-sdk.sh | 60 + - .../rust/packed_simd/ci/android-sysimage.sh | 56 + - third_party/rust/packed_simd/ci/benchmark.sh | 32 + - .../ci/deploy_and_run_on_ios_simulator.rs | 176 +++ - .../docker/aarch64-linux-android/Dockerfile | 47 + - .../aarch64-unknown-linux-gnu/Dockerfile | 14 + - .../docker/arm-linux-androideabi/Dockerfile | 47 + - .../arm-unknown-linux-gnueabi/Dockerfile | 15 + - .../arm-unknown-linux-gnueabihf/Dockerfile | 13 + - .../armv7-unknown-linux-gnueabihf/Dockerfile | 13 + - .../docker/i586-unknown-linux-gnu/Dockerfile | 7 + - .../docker/i686-unknown-linux-gnu/Dockerfile | 7 + - .../docker/mips-unknown-linux-gnu/Dockerfile | 13 + - .../mips64-unknown-linux-gnuabi64/Dockerfile | 10 + - .../Dockerfile | 10 + - .../mipsel-unknown-linux-musl/Dockerfile | 25 + - .../powerpc-unknown-linux-gnu/Dockerfile | 12 + - .../powerpc64-unknown-linux-gnu/Dockerfile | 17 + - .../powerpc64le-unknown-linux-gnu/Dockerfile | 11 + - .../docker/s390x-unknown-linux-gnu/Dockerfile | 20 + - .../sparc64-unknown-linux-gnu/Dockerfile | 18 + - .../thumbv7neon-linux-androideabi/Dockerfile | 47 + - .../Dockerfile | 13 + - .../docker/wasm32-unknown-unknown/Dockerfile | 37 + - .../ci/docker/x86_64-linux-android/Dockerfile | 29 + - .../Dockerfile | 16 + - .../x86_64-unknown-linux-gnu/Dockerfile | 10 + - third_party/rust/packed_simd/ci/dox.sh | 24 + - .../rust/packed_simd/ci/linux-s390x.sh | 18 + - .../rust/packed_simd/ci/linux-sparc64.sh | 17 + - third_party/rust/packed_simd/ci/lld-shim.rs | 11 + - .../rust/packed_simd/ci/max_line_width.sh | 17 + - third_party/rust/packed_simd/ci/run-docker.sh | 38 + - third_party/rust/packed_simd/ci/run.sh | 96 ++ - .../rust/packed_simd/ci/run_examples.sh | 51 + - .../rust/packed_simd/ci/runtest-android.rs | 45 + - .../rust/packed_simd/ci/setup_benchmarks.sh | 10 + - .../rust/packed_simd/ci/test-runner-linux | 24 + - third_party/rust/packed_simd/contributing.md | 67 + - .../rust/packed_simd/perf-guide/.gitignore | 1 + - .../rust/packed_simd/perf-guide/book.toml | 12 + - .../packed_simd/perf-guide/src/SUMMARY.md | 21 + - .../rust/packed_simd/perf-guide/src/ascii.css | 4 + - .../perf-guide/src/bound_checks.md | 22 + - .../perf-guide/src/float-math/approx.md | 8 + - .../perf-guide/src/float-math/fma.md | 6 + - .../perf-guide/src/float-math/fp.md | 3 + - .../perf-guide/src/float-math/svml.md | 7 + - .../perf-guide/src/introduction.md | 26 + - .../packed_simd/perf-guide/src/prof/linux.md | 107 ++ - .../packed_simd/perf-guide/src/prof/mca.md | 100 ++ - .../perf-guide/src/prof/profiling.md | 14 + - .../src/target-feature/attribute.md | 5 + - .../perf-guide/src/target-feature/features.md | 13 + - .../perf-guide/src/target-feature/inlining.md | 5 + - .../perf-guide/src/target-feature/practice.md | 31 + - .../perf-guide/src/target-feature/runtime.md | 5 + - .../src/target-feature/rustflags.md | 77 + - .../perf-guide/src/vert-hor-ops.md | 76 + - third_party/rust/packed_simd/readme.md | 182 +++ - third_party/rust/packed_simd/rustfmt.toml | 7 + - third_party/rust/packed_simd/src/api.rs | 301 ++++ - .../rust/packed_simd/src/api/bit_manip.rs | 128 ++ - third_party/rust/packed_simd/src/api/cast.rs | 108 ++ - .../rust/packed_simd/src/api/cast/macros.rs | 82 + - .../rust/packed_simd/src/api/cast/v128.rs | 79 + - .../rust/packed_simd/src/api/cast/v16.rs | 17 + - .../rust/packed_simd/src/api/cast/v256.rs | 81 + - .../rust/packed_simd/src/api/cast/v32.rs | 30 + - .../rust/packed_simd/src/api/cast/v512.rs | 68 + - .../rust/packed_simd/src/api/cast/v64.rs | 47 + - third_party/rust/packed_simd/src/api/cmp.rs | 16 + - .../rust/packed_simd/src/api/cmp/eq.rs | 27 + - .../rust/packed_simd/src/api/cmp/ord.rs | 43 + - .../packed_simd/src/api/cmp/partial_eq.rs | 67 + - .../packed_simd/src/api/cmp/partial_ord.rs | 234 +++ - .../rust/packed_simd/src/api/cmp/vertical.rs | 114 ++ - .../rust/packed_simd/src/api/default.rs | 28 + - third_party/rust/packed_simd/src/api/fmt.rs | 12 + - .../rust/packed_simd/src/api/fmt/binary.rs | 56 + - .../rust/packed_simd/src/api/fmt/debug.rs | 62 + - .../rust/packed_simd/src/api/fmt/lower_hex.rs | 56 + - .../rust/packed_simd/src/api/fmt/octal.rs | 56 + - .../rust/packed_simd/src/api/fmt/upper_hex.rs | 56 + - third_party/rust/packed_simd/src/api/from.rs | 7 + - .../packed_simd/src/api/from/from_array.rs | 121 ++ - .../packed_simd/src/api/from/from_vector.rs | 67 + - third_party/rust/packed_simd/src/api/hash.rs | 47 + - .../rust/packed_simd/src/api/into_bits.rs | 59 + - .../src/api/into_bits/arch_specific.rs | 190 +++ - .../packed_simd/src/api/into_bits/macros.rs | 74 + - .../packed_simd/src/api/into_bits/v128.rs | 28 + - .../rust/packed_simd/src/api/into_bits/v16.rs | 9 + - .../packed_simd/src/api/into_bits/v256.rs | 27 + - .../rust/packed_simd/src/api/into_bits/v32.rs | 13 + - .../packed_simd/src/api/into_bits/v512.rs | 27 + - .../rust/packed_simd/src/api/into_bits/v64.rs | 18 + - third_party/rust/packed_simd/src/api/math.rs | 4 + - .../rust/packed_simd/src/api/math/float.rs | 61 + - .../packed_simd/src/api/math/float/abs.rs | 31 + - .../packed_simd/src/api/math/float/consts.rs | 86 + - .../packed_simd/src/api/math/float/cos.rs | 44 + - .../packed_simd/src/api/math/float/exp.rs | 33 + - .../rust/packed_simd/src/api/math/float/ln.rs | 33 + - .../packed_simd/src/api/math/float/mul_add.rs | 44 + - .../src/api/math/float/mul_adde.rs | 48 + - .../packed_simd/src/api/math/float/powf.rs | 36 + - .../packed_simd/src/api/math/float/recpre.rs | 36 + - .../packed_simd/src/api/math/float/rsqrte.rs | 40 + - .../packed_simd/src/api/math/float/sin.rs | 50 + - .../packed_simd/src/api/math/float/sqrt.rs | 35 + - .../packed_simd/src/api/math/float/sqrte.rs | 44 + - .../rust/packed_simd/src/api/minimal.rs | 6 + - .../rust/packed_simd/src/api/minimal/iuf.rs | 167 ++ - .../rust/packed_simd/src/api/minimal/mask.rs | 174 +++ - .../rust/packed_simd/src/api/minimal/ptr.rs | 1385 +++++++++++++++++ - third_party/rust/packed_simd/src/api/ops.rs | 32 + - .../src/api/ops/scalar_arithmetic.rs | 203 +++ - .../packed_simd/src/api/ops/scalar_bitwise.rs | 162 ++ - .../src/api/ops/scalar_mask_bitwise.rs | 140 ++ - .../packed_simd/src/api/ops/scalar_shifts.rs | 107 ++ - .../src/api/ops/vector_arithmetic.rs | 148 ++ - .../packed_simd/src/api/ops/vector_bitwise.rs | 129 ++ - .../src/api/ops/vector_float_min_max.rs | 69 + - .../src/api/ops/vector_int_min_max.rs | 57 + - .../src/api/ops/vector_mask_bitwise.rs | 116 ++ - .../packed_simd/src/api/ops/vector_neg.rs | 43 + - .../packed_simd/src/api/ops/vector_rotates.rs | 90 ++ - .../packed_simd/src/api/ops/vector_shifts.rs | 107 ++ - third_party/rust/packed_simd/src/api/ptr.rs | 4 + - .../packed_simd/src/api/ptr/gather_scatter.rs | 241 +++ - .../rust/packed_simd/src/api/reductions.rs | 12 + - .../packed_simd/src/api/reductions/bitwise.rs | 151 ++ - .../src/api/reductions/float_arithmetic.rs | 312 ++++ - .../src/api/reductions/integer_arithmetic.rs | 197 +++ - .../packed_simd/src/api/reductions/mask.rs | 89 ++ - .../packed_simd/src/api/reductions/min_max.rs | 377 +++++ - .../rust/packed_simd/src/api/select.rs | 75 + - .../rust/packed_simd/src/api/shuffle.rs | 190 +++ - .../rust/packed_simd/src/api/shuffle1_dyn.rs | 159 ++ - third_party/rust/packed_simd/src/api/slice.rs | 7 + - .../packed_simd/src/api/slice/from_slice.rs | 216 +++ - .../src/api/slice/write_to_slice.rs | 211 +++ - .../rust/packed_simd/src/api/swap_bytes.rs | 192 +++ - third_party/rust/packed_simd/src/codegen.rs | 59 + - .../rust/packed_simd/src/codegen/bit_manip.rs | 354 +++++ - .../rust/packed_simd/src/codegen/llvm.rs | 99 ++ - .../rust/packed_simd/src/codegen/math.rs | 3 + - .../packed_simd/src/codegen/math/float.rs | 18 + - .../packed_simd/src/codegen/math/float/abs.rs | 103 ++ - .../packed_simd/src/codegen/math/float/cos.rs | 103 ++ - .../src/codegen/math/float/cos_pi.rs | 87 ++ - .../packed_simd/src/codegen/math/float/exp.rs | 112 ++ - .../packed_simd/src/codegen/math/float/ln.rs | 112 ++ - .../src/codegen/math/float/macros.rs | 559 +++++++ - .../src/codegen/math/float/mul_add.rs | 109 ++ - .../src/codegen/math/float/mul_adde.rs | 66 + - .../src/codegen/math/float/powf.rs | 112 ++ - .../packed_simd/src/codegen/math/float/sin.rs | 103 ++ - .../src/codegen/math/float/sin_cos_pi.rs | 195 +++ - .../src/codegen/math/float/sin_pi.rs | 87 ++ - .../src/codegen/math/float/sqrt.rs | 103 ++ - .../src/codegen/math/float/sqrte.rs | 67 + - .../src/codegen/pointer_sized_int.rs | 28 + - .../packed_simd/src/codegen/reductions.rs | 1 + - .../src/codegen/reductions/mask.rs | 69 + - .../src/codegen/reductions/mask/aarch64.rs | 71 + - .../src/codegen/reductions/mask/arm.rs | 54 + - .../src/codegen/reductions/mask/fallback.rs | 6 + - .../codegen/reductions/mask/fallback_impl.rs | 237 +++ - .../src/codegen/reductions/mask/x86.rs | 194 +++ - .../src/codegen/reductions/mask/x86/avx.rs | 101 ++ - .../src/codegen/reductions/mask/x86/avx2.rs | 35 + - .../src/codegen/reductions/mask/x86/sse.rs | 68 + - .../src/codegen/reductions/mask/x86/sse2.rs | 70 + - .../rust/packed_simd/src/codegen/shuffle.rs | 302 ++++ - .../packed_simd/src/codegen/shuffle1_dyn.rs | 432 +++++ - .../packed_simd/src/codegen/swap_bytes.rs | 189 +++ - .../rust/packed_simd/src/codegen/v128.rs | 46 + - .../rust/packed_simd/src/codegen/v16.rs | 7 + - .../rust/packed_simd/src/codegen/v256.rs | 78 + - .../rust/packed_simd/src/codegen/v32.rs | 11 + - .../rust/packed_simd/src/codegen/v512.rs | 145 ++ - .../rust/packed_simd/src/codegen/v64.rs | 21 + - .../rust/packed_simd/src/codegen/vPtr.rs | 33 + - .../rust/packed_simd/src/codegen/vSize.rs | 43 + - third_party/rust/packed_simd/src/lib.rs | 327 ++++ - third_party/rust/packed_simd/src/masks.rs | 128 ++ - third_party/rust/packed_simd/src/sealed.rs | 41 + - third_party/rust/packed_simd/src/testing.rs | 8 + - .../rust/packed_simd/src/testing/macros.rs | 44 + - .../rust/packed_simd/src/testing/utils.rs | 135 ++ - third_party/rust/packed_simd/src/v128.rs | 80 + - third_party/rust/packed_simd/src/v16.rs | 16 + - third_party/rust/packed_simd/src/v256.rs | 86 + - third_party/rust/packed_simd/src/v32.rs | 29 + - third_party/rust/packed_simd/src/v512.rs | 99 ++ - third_party/rust/packed_simd/src/v64.rs | 66 + - third_party/rust/packed_simd/src/vPtr.rs | 34 + - third_party/rust/packed_simd/src/vSize.rs | 53 + - .../rust/packed_simd/tests/endianness.rs | 262 ++++ - third_party/rust/simd/.cargo-checksum.json | 1 - - third_party/rust/simd/Cargo.toml | 37 - - third_party/rust/simd/README.md | 11 - - third_party/rust/simd/benches/mandelbrot.rs | 117 -- - third_party/rust/simd/benches/matrix.rs | 485 ------ - third_party/rust/simd/build.rs | 3 - - third_party/rust/simd/examples/axpy.rs | 65 - - third_party/rust/simd/examples/convert.rs | 38 - - third_party/rust/simd/examples/dot-product.rs | 60 - - .../simd/examples/fannkuch-redux-nosimd.rs | 156 -- - .../rust/simd/examples/fannkuch-redux.rs | 233 --- - third_party/rust/simd/examples/mandelbrot.rs | 125 -- - .../rust/simd/examples/matrix-inverse.rs | 281 ---- - .../rust/simd/examples/nbody-nosimd.rs | 156 -- - third_party/rust/simd/examples/nbody.rs | 170 -- - third_party/rust/simd/examples/ops.rs | 10 - - .../simd/examples/spectral-norm-nosimd.rs | 106 -- - .../rust/simd/examples/spectral-norm.rs | 74 - - third_party/rust/simd/src/aarch64/mod.rs | 3 - - third_party/rust/simd/src/aarch64/neon.rs | 681 -------- - third_party/rust/simd/src/arm/mod.rs | 4 - - third_party/rust/simd/src/arm/neon.rs | 622 -------- - third_party/rust/simd/src/common.rs | 520 ------- - third_party/rust/simd/src/lib.rs | 804 ---------- - third_party/rust/simd/src/sixty_four.rs | 228 --- - third_party/rust/simd/src/v256.rs | 436 ------ - third_party/rust/simd/src/x86/avx.rs | 290 ---- - third_party/rust/simd/src/x86/avx2.rs | 65 - - third_party/rust/simd/src/x86/mod.rs | 16 - - third_party/rust/simd/src/x86/sse2.rs | 359 ----- - third_party/rust/simd/src/x86/sse3.rs | 57 - - third_party/rust/simd/src/x86/sse4_1.rs | 155 -- - third_party/rust/simd/src/x86/sse4_2.rs | 19 - - third_party/rust/simd/src/x86/ssse3.rs | 172 -- - toolkit/moz.configure | 11 +- - 262 files changed, 17410 insertions(+), 6733 deletions(-) - create mode 100644 third_party/rust/packed_simd/.appveyor.yml - create mode 100644 third_party/rust/packed_simd/.cargo-checksum.json - create mode 100644 third_party/rust/packed_simd/.travis.yml - create mode 100644 third_party/rust/packed_simd/Cargo.toml - rename third_party/rust/{simd => packed_simd}/LICENSE-APACHE (100%) - rename third_party/rust/{simd => packed_simd}/LICENSE-MIT (93%) - create mode 100644 third_party/rust/packed_simd/bors.toml - create mode 100644 third_party/rust/packed_simd/build.rs - create mode 100644 third_party/rust/packed_simd/ci/all.sh - create mode 100644 third_party/rust/packed_simd/ci/android-install-ndk.sh - create mode 100644 third_party/rust/packed_simd/ci/android-install-sdk.sh - create mode 100644 third_party/rust/packed_simd/ci/android-sysimage.sh - create mode 100644 third_party/rust/packed_simd/ci/benchmark.sh - create mode 100644 third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs - create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile - create mode 100644 third_party/rust/packed_simd/ci/dox.sh - create mode 100644 third_party/rust/packed_simd/ci/linux-s390x.sh - create mode 100644 third_party/rust/packed_simd/ci/linux-sparc64.sh - create mode 100644 third_party/rust/packed_simd/ci/lld-shim.rs - create mode 100644 third_party/rust/packed_simd/ci/max_line_width.sh - create mode 100644 third_party/rust/packed_simd/ci/run-docker.sh - create mode 100644 third_party/rust/packed_simd/ci/run.sh - create mode 100644 third_party/rust/packed_simd/ci/run_examples.sh - create mode 100644 third_party/rust/packed_simd/ci/runtest-android.rs - create mode 100644 third_party/rust/packed_simd/ci/setup_benchmarks.sh - create mode 100644 third_party/rust/packed_simd/ci/test-runner-linux - create mode 100644 third_party/rust/packed_simd/contributing.md - create mode 100644 third_party/rust/packed_simd/perf-guide/.gitignore - create mode 100644 third_party/rust/packed_simd/perf-guide/book.toml - create mode 100644 third_party/rust/packed_simd/perf-guide/src/SUMMARY.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/ascii.css - create mode 100644 third_party/rust/packed_simd/perf-guide/src/bound_checks.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/approx.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fma.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fp.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/svml.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/introduction.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/linux.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/mca.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/profiling.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/features.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md - create mode 100644 third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md - create mode 100644 third_party/rust/packed_simd/readme.md - create mode 100644 third_party/rust/packed_simd/rustfmt.toml - create mode 100644 third_party/rust/packed_simd/src/api.rs - create mode 100644 third_party/rust/packed_simd/src/api/bit_manip.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/macros.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v128.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v16.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v256.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v32.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v512.rs - create mode 100644 third_party/rust/packed_simd/src/api/cast/v64.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp/eq.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp/ord.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_eq.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_ord.rs - create mode 100644 third_party/rust/packed_simd/src/api/cmp/vertical.rs - create mode 100644 third_party/rust/packed_simd/src/api/default.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt/binary.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt/debug.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt/lower_hex.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt/octal.rs - create mode 100644 third_party/rust/packed_simd/src/api/fmt/upper_hex.rs - create mode 100644 third_party/rust/packed_simd/src/api/from.rs - create mode 100644 third_party/rust/packed_simd/src/api/from/from_array.rs - create mode 100644 third_party/rust/packed_simd/src/api/from/from_vector.rs - create mode 100644 third_party/rust/packed_simd/src/api/hash.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/macros.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v128.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v16.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v256.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v32.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v512.rs - create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v64.rs - create mode 100644 third_party/rust/packed_simd/src/api/math.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/abs.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/consts.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/cos.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/exp.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/ln.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_add.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_adde.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/powf.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/recpre.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/rsqrte.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/sin.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrt.rs - create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrte.rs - create mode 100644 third_party/rust/packed_simd/src/api/minimal.rs - create mode 100644 third_party/rust/packed_simd/src/api/minimal/iuf.rs - create mode 100644 third_party/rust/packed_simd/src/api/minimal/mask.rs - create mode 100644 third_party/rust/packed_simd/src/api/minimal/ptr.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_neg.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_rotates.rs - create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_shifts.rs - create mode 100644 third_party/rust/packed_simd/src/api/ptr.rs - create mode 100644 third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions/bitwise.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions/mask.rs - create mode 100644 third_party/rust/packed_simd/src/api/reductions/min_max.rs - create mode 100644 third_party/rust/packed_simd/src/api/select.rs - create mode 100644 third_party/rust/packed_simd/src/api/shuffle.rs - create mode 100644 third_party/rust/packed_simd/src/api/shuffle1_dyn.rs - create mode 100644 third_party/rust/packed_simd/src/api/slice.rs - create mode 100644 third_party/rust/packed_simd/src/api/slice/from_slice.rs - create mode 100644 third_party/rust/packed_simd/src/api/slice/write_to_slice.rs - create mode 100644 third_party/rust/packed_simd/src/api/swap_bytes.rs - create mode 100644 third_party/rust/packed_simd/src/codegen.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/bit_manip.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/llvm.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/abs.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/exp.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/ln.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/macros.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/powf.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/swap_bytes.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v128.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v16.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v256.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v32.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v512.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/v64.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/vPtr.rs - create mode 100644 third_party/rust/packed_simd/src/codegen/vSize.rs - create mode 100644 third_party/rust/packed_simd/src/lib.rs - create mode 100644 third_party/rust/packed_simd/src/masks.rs - create mode 100644 third_party/rust/packed_simd/src/sealed.rs - create mode 100644 third_party/rust/packed_simd/src/testing.rs - create mode 100644 third_party/rust/packed_simd/src/testing/macros.rs - create mode 100644 third_party/rust/packed_simd/src/testing/utils.rs - create mode 100644 third_party/rust/packed_simd/src/v128.rs - create mode 100644 third_party/rust/packed_simd/src/v16.rs - create mode 100644 third_party/rust/packed_simd/src/v256.rs - create mode 100644 third_party/rust/packed_simd/src/v32.rs - create mode 100644 third_party/rust/packed_simd/src/v512.rs - create mode 100644 third_party/rust/packed_simd/src/v64.rs - create mode 100644 third_party/rust/packed_simd/src/vPtr.rs - create mode 100644 third_party/rust/packed_simd/src/vSize.rs - create mode 100644 third_party/rust/packed_simd/tests/endianness.rs - delete mode 100644 third_party/rust/simd/.cargo-checksum.json - delete mode 100644 third_party/rust/simd/Cargo.toml - delete mode 100644 third_party/rust/simd/README.md - delete mode 100755 third_party/rust/simd/benches/mandelbrot.rs - delete mode 100755 third_party/rust/simd/benches/matrix.rs - delete mode 100644 third_party/rust/simd/build.rs - delete mode 100755 third_party/rust/simd/examples/axpy.rs - delete mode 100644 third_party/rust/simd/examples/convert.rs - delete mode 100755 third_party/rust/simd/examples/dot-product.rs - delete mode 100644 third_party/rust/simd/examples/fannkuch-redux-nosimd.rs - delete mode 100755 third_party/rust/simd/examples/fannkuch-redux.rs - delete mode 100755 third_party/rust/simd/examples/mandelbrot.rs - delete mode 100644 third_party/rust/simd/examples/matrix-inverse.rs - delete mode 100644 third_party/rust/simd/examples/nbody-nosimd.rs - delete mode 100755 third_party/rust/simd/examples/nbody.rs - delete mode 100644 third_party/rust/simd/examples/ops.rs - delete mode 100644 third_party/rust/simd/examples/spectral-norm-nosimd.rs - delete mode 100755 third_party/rust/simd/examples/spectral-norm.rs - delete mode 100644 third_party/rust/simd/src/aarch64/mod.rs - delete mode 100644 third_party/rust/simd/src/aarch64/neon.rs - delete mode 100644 third_party/rust/simd/src/arm/mod.rs - delete mode 100644 third_party/rust/simd/src/arm/neon.rs - delete mode 100644 third_party/rust/simd/src/common.rs - delete mode 100644 third_party/rust/simd/src/lib.rs - delete mode 100644 third_party/rust/simd/src/sixty_four.rs - delete mode 100644 third_party/rust/simd/src/v256.rs - delete mode 100644 third_party/rust/simd/src/x86/avx.rs - delete mode 100644 third_party/rust/simd/src/x86/avx2.rs - delete mode 100644 third_party/rust/simd/src/x86/mod.rs - delete mode 100644 third_party/rust/simd/src/x86/sse2.rs - delete mode 100644 third_party/rust/simd/src/x86/sse3.rs - delete mode 100644 third_party/rust/simd/src/x86/sse4_1.rs - delete mode 100644 third_party/rust/simd/src/x86/sse4_2.rs - delete mode 100644 third_party/rust/simd/src/x86/ssse3.rs - -diff --git a/.cargo/config.in b/.cargo/config.in -index 94f5732891fb..57ae36311e52 100644 ---- a/.cargo/config.in -+++ b/.cargo/config.in -@@ -17,6 +17,11 @@ git = "https://github.com/froydnj/winapi-rs" - branch = "aarch64" - replace-with = "vendored-sources" - -+[source."https://github.com/rust-lang-nursery/packed_simd"] -+git = "https://github.com/hsivonen/packed_simd" -+branch = "rust_1_32" -+replace-with = "vendored-sources" -+ - [source.vendored-sources] - directory = '@top_srcdir@/third_party/rust' - -diff --git a/Cargo.lock b/Cargo.lock -index 8896cfeddb28..a048a5522ab0 100644 ---- a/Cargo.lock -+++ b/Cargo.lock -@@ -141,7 +141,7 @@ version = "0.3.9" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ - "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", -@@ -218,7 +218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -@@ -372,7 +372,7 @@ dependencies = [ - - [[package]] - name = "cfg-if" --version = "0.1.2" -+version = "0.1.6" - source = "registry+https://github.com/rust-lang/crates.io-index" - - [[package]] -@@ -575,41 +575,41 @@ version = "0.3.1" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ - "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", - "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] - name = "crossbeam-epoch" - version = "0.4.3" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ - "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] - name = "crossbeam-utils" - version = "0.2.2" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] - name = "crossbeam-utils" - version = "0.3.2" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] -@@ -848,25 +848,25 @@ name = "encoding_c" - version = "0.9.0" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", -+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] - name = "encoding_glue" - version = "0.1.0" - dependencies = [ -- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", -+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", - "nserror 0.1.0", - "nsstring 0.1.0", - ] - - [[package]] - name = "encoding_rs" --version = "0.8.14" -+version = "0.8.16" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -- "simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", -+ "packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)", - ] - - [[package]] -@@ -1477,7 +1477,7 @@ name = "log" - version = "0.4.6" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] -@@ -1719,7 +1719,7 @@ name = "net2" - version = "0.2.32" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", - ] -@@ -1773,7 +1773,7 @@ name = "nsstring" - version = "0.1.0" - dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", -- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", -+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", - ] - - [[package]] -@@ -1859,6 +1859,14 @@ dependencies = [ - "stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - ] - -+[[package]] -+name = "packed_simd" -+version = "0.3.3" -+source = "git+https://github.com/hsivonen/packed_simd?branch=rust_1_32#3541e3818fdc7c2a24f87e3459151a4ce955a67a" -+dependencies = [ -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", -+] -+ - [[package]] - name = "parking_lot" - version = "0.6.3" -@@ -2354,11 +2362,6 @@ dependencies = [ - "opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - ] - --[[package]] --name = "simd" --version = "0.2.3" --source = "registry+https://github.com/rust-lang/crates.io-index" -- - [[package]] - name = "siphasher" - version = "0.2.1" -@@ -2936,7 +2939,7 @@ name = "uuid" - version = "0.6.5" - source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - ] - -@@ -3017,7 +3020,7 @@ dependencies = [ - "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", -- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "core-foundation 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "core-graphics 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", - "core-text 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -@@ -3253,7 +3256,7 @@ dependencies = [ - "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" - "checksum cc 1.0.23 (registry+https://github.com/rust-lang/crates.io-index)" = "c37f0efaa4b9b001fa6f02d4b644dee4af97d3414df07c51e3e4f015f3a3e131" - "checksum cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fc0086be9ca82f7fc89fc873435531cb898b86e850005850de1f820e2db6e9b" --"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" -+"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" - "checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" - "checksum clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)" = "481e42017c1416b1c0856ece45658ecbb7c93d8a93455f7e5fa77f3b35455557" - "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536" -@@ -3303,7 +3306,7 @@ dependencies = [ - "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" - "checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621" - "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee" --"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" -+"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73" - "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad" - "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" - "checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea" -@@ -3388,6 +3391,7 @@ dependencies = [ - "checksum ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2f0015e9e8e28ee20c581cfbfe47c650cedeb9ed0721090e0b7ebb10b9cdbcc2" - "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" - "checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" -+"checksum packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)" = "" - "checksum parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69376b761943787ebd5cc85a5bc95958651a22609c5c1c2b65de21786baec72b" - "checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" - "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" -@@ -3441,7 +3445,6 @@ dependencies = [ - "checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae" - "checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0" - "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" --"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84" - "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84" - "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" - "checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d" -diff --git a/Cargo.toml b/Cargo.toml -index d64cbc77b53d..25859a20ecc3 100644 ---- a/Cargo.toml -+++ b/Cargo.toml -@@ -59,3 +59,4 @@ codegen-units = 1 - libudev-sys = { path = "dom/webauthn/libudev-sys" } - serde_derive = { git = "https://github.com/servo/serde", branch = "deserialize_from_enums9" } - winapi = { git = "https://github.com/froydnj/winapi-rs", branch = "aarch64" } -+packed_simd = { git = "https://github.com/hsivonen/packed_simd", branch = "rust_1_32" } -diff --git a/third_party/rust/cfg-if/.cargo-checksum.json b/third_party/rust/cfg-if/.cargo-checksum.json -index 89b14a227216..b744a21d9fd1 100644 ---- a/third_party/rust/cfg-if/.cargo-checksum.json -+++ b/third_party/rust/cfg-if/.cargo-checksum.json -@@ -1 +1 @@ --{"files":{"Cargo.toml":"764b9ce160653e841430da3919ff968b957ff811f7da42c8483c8bfc2f06be25","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"3fa9368c60bc701dea294fbacae0469188c4be1de79f82e972bb9b321776cd52","src/lib.rs":"6915169e3ca05f28e1cb0e052379d74f2496400de1240b74c56e55c2674a6560","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"} -\ No newline at end of file -+{"files":{"Cargo.toml":"090d983ec20ad09e59f6b7679b48b9b54e9c0841cf2922b81cba485edcd40876","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"1cd0ebc3b30a9c9eddb0fda5515b5a52ec2b85a087328f0ee9f4d68cbb28afc2","src/lib.rs":"f02d6e295109365cf54884e5282a3e7d1e1f62857c700f23cd013e94a56bd803","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"} -\ No newline at end of file -diff --git a/third_party/rust/cfg-if/Cargo.toml b/third_party/rust/cfg-if/Cargo.toml -index 7afa063d1ef5..84c4fc7835ab 100644 ---- a/third_party/rust/cfg-if/Cargo.toml -+++ b/third_party/rust/cfg-if/Cargo.toml -@@ -1,14 +1,24 @@ -+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -+# -+# When uploading crates to the registry Cargo will automatically -+# "normalize" Cargo.toml files for maximal compatibility -+# with all versions of Cargo and also rewrite `path` dependencies -+# to registry (e.g. crates.io) dependencies -+# -+# If you believe there's an error in this file please file an -+# issue against the rust-lang/cargo repository. If you're -+# editing this file be aware that the upstream Cargo.toml -+# will likely look very different (and much more reasonable) -+ - [package] - name = "cfg-if" --version = "0.1.2" -+version = "0.1.6" - authors = ["Alex Crichton "] --license = "MIT/Apache-2.0" -+description = "A macro to ergonomically define an item depending on a large number of #[cfg]\nparameters. Structured like an if-else chain, the first matching branch is the\nitem that gets emitted.\n" -+homepage = "https://github.com/alexcrichton/cfg-if" -+documentation = "https://docs.rs/cfg-if" - readme = "README.md" -+license = "MIT/Apache-2.0" - repository = "https://github.com/alexcrichton/cfg-if" --homepage = "https://github.com/alexcrichton/cfg-if" --documentation = "http://alexcrichton.com/cfg-if" --description = """ --A macro to ergonomically define an item depending on a large number of #[cfg] --parameters. Structured like an if-else chain, the first matching branch is the --item that gets emitted. --""" -+[badges.travis-ci] -+repository = "alexcrichton/cfg-if" -diff --git a/third_party/rust/cfg-if/README.md b/third_party/rust/cfg-if/README.md -index e9859dadb609..344a946c0487 100644 ---- a/third_party/rust/cfg-if/README.md -+++ b/third_party/rust/cfg-if/README.md -@@ -2,7 +2,7 @@ - - [![Build Status](https://travis-ci.org/alexcrichton/cfg-if.svg?branch=master)](https://travis-ci.org/alexcrichton/cfg-if) - --[Documentation](http://alexcrichton.com/cfg-if) -+[Documentation](https://docs.rs/cfg-if) - - A macro to ergonomically define an item depending on a large number of #[cfg] - parameters. Structured like an if-else chain, the first matching branch is the -@@ -36,9 +36,17 @@ fn main() { - - # License - --`cfg-if` is primarily distributed under the terms of both the MIT license and --the Apache License (Version 2.0), with portions covered by various BSD-like --licenses. -+This project is licensed under either of - --See LICENSE-APACHE, and LICENSE-MIT for details. -+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or -+ http://www.apache.org/licenses/LICENSE-2.0) -+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or -+ http://opensource.org/licenses/MIT) - -+at your option. -+ -+### Contribution -+ -+Unless you explicitly state otherwise, any contribution intentionally submitted -+for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be -+dual licensed as above, without any additional terms or conditions. -diff --git a/third_party/rust/cfg-if/src/lib.rs b/third_party/rust/cfg-if/src/lib.rs -index 563cda81f42d..ff144f69f862 100644 ---- a/third_party/rust/cfg-if/src/lib.rs -+++ b/third_party/rust/cfg-if/src/lib.rs -@@ -1,81 +1,90 @@ --//! A macro for defining #[cfg] if-else statements. -+//! A macro for defining `#[cfg]` if-else statements. - //! - //! The macro provided by this crate, `cfg_if`, is similar to the `if/elif` C - //! preprocessor macro by allowing definition of a cascade of `#[cfg]` cases, - //! emitting the implementation which matches first. - //! --//! This allows you to conveniently provide a long list #[cfg]'d blocks of code -+//! This allows you to conveniently provide a long list `#[cfg]`'d blocks of code - //! without having to rewrite each clause multiple times. - //! - //! # Example - //! - //! ``` - //! #[macro_use] - //! extern crate cfg_if; - //! - //! cfg_if! { - //! if #[cfg(unix)] { - //! fn foo() { /* unix specific functionality */ } - //! } else if #[cfg(target_pointer_width = "32")] { - //! fn foo() { /* non-unix, 32-bit functionality */ } - //! } else { - //! fn foo() { /* fallback implementation */ } - //! } - //! } - //! - //! # fn main() {} - //! ``` - - #![no_std] - --#![doc(html_root_url = "http://alexcrichton.com/cfg-if")] -+#![doc(html_root_url = "https://docs.rs/cfg-if")] - #![deny(missing_docs)] - #![cfg_attr(test, deny(warnings))] - --#[macro_export] -+#[macro_export(local_inner_macros)] - macro_rules! cfg_if { -+ // match if/else chains with a final `else` - ($( - if #[cfg($($meta:meta),*)] { $($it:item)* } - ) else * else { - $($it2:item)* - }) => { -- __cfg_if_items! { -+ cfg_if! { -+ @__items - () ; - $( ( ($($meta),*) ($($it)*) ), )* - ( () ($($it2)*) ), - } - }; -+ -+ // match if/else chains lacking a final `else` - ( - if #[cfg($($i_met:meta),*)] { $($i_it:item)* } - $( - else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } - )* - ) => { -- __cfg_if_items! { -+ cfg_if! { -+ @__items - () ; - ( ($($i_met),*) ($($i_it)*) ), - $( ( ($($e_met),*) ($($e_it)*) ), )* - ( () () ), - } -- } --} -+ }; - --#[macro_export] --#[doc(hidden)] --macro_rules! __cfg_if_items { -- (($($not:meta,)*) ; ) => {}; -- (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { -- __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* } -- __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* } -- } --} -+ // Internal and recursive macro to emit all the items -+ // -+ // Collects all the negated cfgs in a list at the beginning and after the -+ // semicolon is all the remaining items -+ (@__items ($($not:meta,)*) ; ) => {}; -+ (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { -+ // Emit all items within one block, applying an approprate #[cfg]. The -+ // #[cfg] will require all `$m` matchers specified and must also negate -+ // all previous matchers. -+ cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } - --#[macro_export] --#[doc(hidden)] --macro_rules! __cfg_if_apply { -- ($m:meta, $($it:item)*) => { -+ // Recurse to emit all other items in `$rest`, and when we do so add all -+ // our `$m` matchers to the list of `$not` matchers as future emissions -+ // will have to negate everything we just matched as well. -+ cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } -+ }; -+ -+ // Internal macro to Apply a cfg attribute to a list of items -+ (@__apply $m:meta, $($it:item)*) => { - $(#[$m] $it)* -- } -+ }; - } - - #[cfg(test)] -diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json -index c063d4c27534..7c1901dce515 100644 ---- a/third_party/rust/encoding_rs/.cargo-checksum.json -+++ b/third_party/rust/encoding_rs/.cargo-checksum.json -@@ -1 +1 @@ --{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"f4c9b33981fe222ef322d640f5ef680828d75dcd534b8aa2bfdd576598deea64","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"ad140c9178067c8bdba8ae43ddffd0506d70d49474731247a050ff99a3ff7832","build.rs":"f5defca2c68b73e8723f489a9279af4fbe9724abc6e9abf58d32542e8a459e26","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"71aa7de1c5236a34ea0a8bb85332987751d2466b756fca6b3f6ac0da765cf91e","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e786de9e92e5652bc200266cf318753eea869e8971857cc0caa65a3cfe687545","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"f412f60f2d4afb7e32ffba94dc5f93716e6ae9f065799ca17bb1f1b2145f6ee4","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"565ceeffe81173b85700c55c396ab72068751ef809bea8e1cb1e6c7919f5a905","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"ab26ea900c8f7b7a4d1172872b7ca4bc573bc60b7b1979c93aafdfb86b2c2235"},"package":"a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"} -\ No newline at end of file -+{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"} -\ No newline at end of file -diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml -index 65fc8e8dffcd..e29f19fb9afe 100644 ---- a/third_party/rust/encoding_rs/Cargo.toml -+++ b/third_party/rust/encoding_rs/Cargo.toml -@@ -12,47 +12,47 @@ - - [package] - name = "encoding_rs" --version = "0.8.14" -+version = "0.8.16" - authors = ["Henri Sivonen "] - description = "A Gecko-oriented implementation of the Encoding Standard" - homepage = "https://docs.rs/encoding_rs/" - documentation = "https://docs.rs/encoding_rs/" - readme = "README.md" - keywords = ["encoding", "web", "unicode", "charset"] - categories = ["text-processing", "encoding", "web-programming", "internationalization"] - license = "MIT/Apache-2.0" - repository = "https://github.com/hsivonen/encoding_rs" - [profile.release] - lto = true - [dependencies.cfg-if] - version = "0.1.0" - -+[dependencies.packed_simd] -+version = "0.3.3" -+optional = true -+ - [dependencies.serde] - version = "1.0" - optional = true -- --[dependencies.simd] --version = "0.2.3" --optional = true - [dev-dependencies.bincode] - version = "0.8" - - [dev-dependencies.serde_derive] - version = "1.0" - - [dev-dependencies.serde_json] - version = "1.0" - - [features] - fast-big5-hanzi-encode = [] - fast-gb-hanzi-encode = [] - fast-hangul-encode = [] - fast-hanja-encode = [] - fast-kanji-encode = [] - fast-legacy-encode = ["fast-hangul-encode", "fast-hanja-encode", "fast-kanji-encode", "fast-gb-hanzi-encode", "fast-big5-hanzi-encode"] - less-slow-big5-hanzi-encode = [] - less-slow-gb-hanzi-encode = [] - less-slow-kanji-encode = [] --simd-accel = ["simd"] -+simd-accel = ["packed_simd", "packed_simd/into_bits"] - [badges.travis-ci] - repository = "hsivonen/encoding_rs" -diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md -index 3446efd0bb43..8a72b515450e 100644 ---- a/third_party/rust/encoding_rs/README.md -+++ b/third_party/rust/encoding_rs/README.md -@@ -126,17 +126,39 @@ There are currently these optional cargo features: - - ### `simd-accel` - --Enables SSE2 acceleration on x86 and x86_64 and NEON acceleration on Aarch64 --and ARMv7. _Enabling this cargo feature is recommended when building for x86, --x86_64, ARMv7 or Aarch64._ The intention is for the functionality enabled by --this feature to become the normal on-by-default behavior once --[portable SIMD](https://github.com/rust-lang/rfcs/pull/2366) becames part of --stable Rust. -- --Enabling this feature breaks the build unless the target is x86 with SSE2 --(Rust's default 32-bit x86 target, `i686`, has SSE2, but Linux distros may --use an x86 target without SSE2, i.e. `i586` in `rustup` terms), ARMv7 or --thumbv7 with NEON (`-C target_feature=+neon`), x86_64 or Aarch64. -+Enables SIMD acceleration using the nightly-dependent `packed_simd` crate. -+ -+This is an opt-in feature, because enabling this feature _opts out_ of Rust's -+guarantees of future compilers compiling old code (aka. "stability story"). -+ -+Currently, this has not been tested to be an improvement except for these -+targets: -+ -+* x86_64 -+* i686 -+* aarch64 -+* thumbv7neon -+ -+If you use nightly Rust, you use targets whose first component is one of the -+above, and you are prepared _to have to revise your configuration when updating -+Rust_, you should enable this feature. Otherwise, please _do not_ enable this -+feature. -+ -+_Note!_ If you are compiling for a target that does not have 128-bit SIMD -+enabled as part of the target definition and you are enabling 128-bit SIMD -+using `-C target_feature`, you need to enable the `core_arch` Cargo feature -+for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of -+using the standard-library copy of `core::arch`, because the `core::arch` -+module of the pre-compiled standard library has been compiled with the -+assumption that the CPU doesn't have 128-bit SIMD. At present this applies -+mainly to 32-bit ARM targets whose first component does not include the -+substring `neon`. -+ -+The encoding_rs side of things has not been properly set up for POWER, -+PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow -+the advice from the previous paragraph, you probably shouldn't use -+the `simd-accel` option on the less mainstream architectures at this -+time. - - Used by Firefox. - -@@ -382,6 +404,14 @@ To regenerate the generated code: - - ## Release Notes - -+### 0.8.16 -+ -+* Switch from the `simd` crate to `packed_simd`. -+ -+### 0.8.15 -+ -+* Adjust documentation for `simd-accel` (README-only release). -+ - ### 0.8.14 - - * Made UTF-16 to UTF-8 encode conversion fill the output buffer as -diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs -index 1b7adf780010..e687878081f7 100644 ---- a/third_party/rust/encoding_rs/build.rs -+++ b/third_party/rust/encoding_rs/build.rs -@@ -1,4 +1,12 @@ - fn main() { -+ // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`. -+ // You still need to knowingly have a setup that makes -+ // `packed_simd` compile. Therefore, having this file on -+ // crates.io is harmless in terms of users of `encoding_rs` -+ // accidentally depending on nightly features. Having this -+ // here means that if you knowingly want this, you only -+ // need to maintain a fork of `packed_simd` without _also_ -+ // having to maintain a fork of `encoding_rs`. - #[cfg(feature = "simd-accel")] - println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); - } -diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs -index d75b65d75ce3..08da62d20051 100644 ---- a/third_party/rust/encoding_rs/src/handles.rs -+++ b/third_party/rust/encoding_rs/src/handles.rs -@@ -34,7 +34,7 @@ use simd_funcs::*; - all(target_endian = "little", target_feature = "neon") - ) - ))] --use simd::u16x8; -+use packed_simd::u16x8; - - use super::DecoderResult; - use super::EncoderResult; -diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs -index 912c349a0e25..23069375d6f8 100644 ---- a/third_party/rust/encoding_rs/src/lib.rs -+++ b/third_party/rust/encoding_rs/src/lib.rs -@@ -11,7 +11,7 @@ - feature = "cargo-clippy", - allow(doc_markdown, inline_always, new_ret_no_self) - )] --#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.14")] -+#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")] - - //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation - //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust. -@@ -665,20 +665,21 @@ - //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes) - //! for discussion about the UTF-16 family. - --#![cfg_attr(feature = "simd-accel", feature(platform_intrinsics, core_intrinsics))] -+#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))] - - #[macro_use] - extern crate cfg_if; - - #[cfg(all( - feature = "simd-accel", - any( - target_feature = "sse2", - all(target_endian = "little", target_arch = "aarch64"), - all(target_endian = "little", target_feature = "neon") - ) - ))] --extern crate simd; -+#[macro_use(shuffle)] -+extern crate packed_simd; - - #[cfg(feature = "serde")] - extern crate serde; -diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs -index 6cd1a4448056..c5ee605c1b13 100644 ---- a/third_party/rust/encoding_rs/src/mem.rs -+++ b/third_party/rust/encoding_rs/src/mem.rs -@@ -228,8 +228,8 @@ macro_rules! by_unit_check_simd { - cfg_if! { - if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] { - use simd_funcs::*; -- use simd::u8x16; -- use simd::u16x8; -+ use packed_simd::u8x16; -+ use packed_simd::u16x8; - - const SIMD_ALIGNMENT: usize = 16; - -@@ -631,47 +631,42 @@ cfg_if! { - /// - /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function - /// is not guaranteed to fail fast.) --#[inline] - pub fn is_ascii(buffer: &[u8]) -> bool { - is_ascii_impl(buffer) - } - - /// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing - /// only ASCII characters). - /// - /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function - /// is not guaranteed to fail fast.) --#[inline] - pub fn is_basic_latin(buffer: &[u16]) -> bool { - is_basic_latin_impl(buffer) - } - - /// Checks whether the buffer is valid UTF-8 representing only code points - /// less than or equal to U+00FF. - /// - /// Fails fast. (I.e. returns before having read the whole buffer if UTF-8 - /// invalidity or code points above U+00FF are discovered. --#[inline] - pub fn is_utf8_latin1(buffer: &[u8]) -> bool { - is_utf8_latin1_impl(buffer).is_none() - } - - /// Checks whether the buffer represents only code point less than or equal - /// to U+00FF. - /// - /// Fails fast. (I.e. returns before having read the whole buffer if code - /// points above U+00FF are discovered. --#[inline] - pub fn is_str_latin1(buffer: &str) -> bool { - is_str_latin1_impl(buffer).is_none() - } - - /// Checks whether the buffer represents only code point less than or equal - /// to U+00FF. - /// - /// May read the entire buffer even if it isn't all-Latin1. (I.e. the function - /// is not guaranteed to fail fast.) --#[inline] - pub fn is_utf16_latin1(buffer: &[u16]) -> bool { - is_utf16_latin1_impl(buffer) - } -@@ -1283,7 +1278,6 @@ pub fn is_str_bidi(buffer: &str) -> bool { - /// high surrogate that could be the high half of an RTL character. - /// Returns `false` if the input contains neither RTL characters nor - /// unpaired high surrogates that could be higher halves of RTL characters. --#[inline] - pub fn is_utf16_bidi(buffer: &[u16]) -> bool { - is_utf16_bidi_impl(buffer) - } -@@ -1416,67 +1410,63 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool { - /// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`. - /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return - /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. --#[inline] - pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi { - if let Some(offset) = is_utf8_latin1_impl(buffer) { - if is_utf8_bidi(&buffer[offset..]) { - Latin1Bidi::Bidi - } else { - Latin1Bidi::LeftToRight - } - } else { - Latin1Bidi::Latin1 - } - } - - /// Checks whether a valid UTF-8 buffer contains code points - /// that trigger right-to-left processing or is all-Latin1. - /// - /// Possibly more efficient than performing the checks separately. - /// - /// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`. - /// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return - /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. --#[inline] - pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi { - // The transition from the latin1 check to the bidi check isn't - // optimal but not tweaking it to perfection today. - if let Some(offset) = is_str_latin1_impl(buffer) { - if is_str_bidi(&buffer[offset..]) { - Latin1Bidi::Bidi - } else { - Latin1Bidi::LeftToRight - } - } else { - Latin1Bidi::Latin1 - } - } - - /// Checks whether a potentially invalid UTF-16 buffer contains code points - /// that trigger right-to-left processing or is all-Latin1. - /// - /// Possibly more efficient than performing the checks separately. - /// - /// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`. - /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return - /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. --#[inline] - pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi { - check_utf16_for_latin1_and_bidi_impl(buffer) - } - - /// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced - /// with the REPLACEMENT CHARACTER. - /// - /// The length of the destination buffer must be at least the length of the - /// source buffer _plus one_. - /// - /// Returns the number of `u16`s written. - /// - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { - // TODO: Can the requirement for dst to be at least one unit longer - // be eliminated? -@@ -1516,7 +1506,6 @@ pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize { - assert!( - dst.len() >= src.len(), -@@ -1683,7 +1672,6 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize { - /// not allocating memory for the worst case up front. Specifically, - /// if the input starts with or ends with an unpaired surrogate, those are - /// replaced with the REPLACEMENT CHARACTER. --#[inline] - pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) { - let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() }; - let (read, written) = convert_utf16_to_utf8_partial(src, bytes); -@@ -1727,7 +1715,6 @@ pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize { - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { - assert!( - dst.len() >= src.len(), -@@ -1755,7 +1742,6 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { - /// indicated by the return value, so using a `&mut str` interpreted as - /// `&mut [u8]` as the destination is not safe. If you want to convert into - /// a `&mut str`, use `convert_utf16_to_str()` instead of this function. --#[inline] - pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) { - let src_len = src.len(); - let src_ptr = src.as_ptr(); -@@ -1894,7 +1880,6 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize { - /// - /// If debug assertions are enabled (and not fuzzing) and the input is - /// not in the range U+0000 to U+00FF, inclusive. --#[inline] - pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { - assert!( - dst.len() >= src.len(), -@@ -1957,7 +1942,6 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { - /// - /// (Probably in future versions if debug assertions are enabled (and not - /// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.) --#[inline] - pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) { - assert!( - dst.len() >= src.len(), -@@ -2030,7 +2014,6 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> { - - /// Returns the index of the first unpaired surrogate or, if the input is - /// valid UTF-16 in its entirety, the length of the input. --#[inline] - pub fn utf16_valid_up_to(buffer: &[u16]) -> usize { - utf16_valid_up_to_impl(buffer) - } -@@ -2060,61 +2043,58 @@ pub fn ensure_utf16_validity(buffer: &mut [u16]) { - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize { - assert!( - dst.len() >= src.len(), - "Destination must not be shorter than the source." - ); - if let Some((_, consumed)) = - unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } - { - consumed - } else { - src.len() - } - } - - /// Copies ASCII from source to destination zero-extending it to UTF-16 up to - /// the first non-ASCII byte (or the end of the input if it is ASCII in its - /// entirety). - /// - /// The length of the destination buffer must be at least the length of the - /// source buffer. - /// - /// Returns the number of `u16`s written. - /// - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize { - assert!( - dst.len() >= src.len(), - "Destination must not be shorter than the source." - ); - if let Some((_, consumed)) = - unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) } - { - consumed - } else { - src.len() - } - } - - /// Copies Basic Latin from source to destination narrowing it to ASCII up to - /// the first non-Basic Latin code unit (or the end of the input if it is - /// Basic Latin in its entirety). - /// - /// The length of the destination buffer must be at least the length of the - /// source buffer. - /// - /// Returns the number of bytes written. - /// - /// # Panics - /// - /// Panics if the destination buffer is shorter than stated above. --#[inline] - pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize { - assert!( - dst.len() >= src.len(), -diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs -index 0cc05baf784d..4e19b0e8a07e 100644 ---- a/third_party/rust/encoding_rs/src/simd_funcs.rs -+++ b/third_party/rust/encoding_rs/src/simd_funcs.rs -@@ -7,9 +7,9 @@ - // option. This file may not be copied, modified, or distributed - // except according to those terms. - --use simd::u16x8; --use simd::u8x16; --use simd::Simd; -+use packed_simd::u16x8; -+use packed_simd::u8x16; -+use packed_simd::FromBits; - - // TODO: Migrate unaligned access to stdlib code if/when the RFC - // https://github.com/rust-lang/rfcs/pull/1725 is implemented. -@@ -62,81 +62,79 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { - *(ptr as *mut u16x8) = s; - } - --extern "platform-intrinsic" { -- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; -+cfg_if! { -+ if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { -+ use std::arch::x86_64::__m128i; -+ use std::arch::x86_64::_mm_movemask_epi8; -+ use std::arch::x86_64::_mm_packus_epi16; -+ } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { -+ use std::arch::x86::__m128i; -+ use std::arch::x86::_mm_movemask_epi8; -+ use std::arch::x86::_mm_packus_epi16; -+ } else if #[cfg(target_arch = "aarch64")]{ -+ use std::arch::aarch64::uint8x16_t; -+ use std::arch::aarch64::uint16x8_t; -+ use std::arch::aarch64::vmaxvq_u8; -+ use std::arch::aarch64::vmaxvq_u16; -+ } else { -+ -+ } - } - - // #[inline(always)] - // fn simd_byte_swap_u8(s: u8x16) -> u8x16 { - // unsafe { --// simd_shuffle16(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) -+// shuffle!(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) - // } - // } - - // #[inline(always)] - // pub fn simd_byte_swap(s: u16x8) -> u16x8 { - // to_u16_lanes(simd_byte_swap_u8(to_u8_lanes(s))) - // } - - #[inline(always)] - pub fn simd_byte_swap(s: u16x8) -> u16x8 { - let left = s << 8; - let right = s >> 8; - left | right - } - - #[inline(always)] - pub fn to_u16_lanes(s: u8x16) -> u16x8 { -- unsafe { ::std::mem::transmute(s) } -+ u16x8::from_bits(s) - } - --// #[inline(always)] --// pub fn to_u8_lanes(s: u16x8) -> u8x16 { --// unsafe { ::std::mem::transmute(s) } --// } -- - cfg_if! { - if #[cfg(target_feature = "sse2")] { - -- use simd::i16x8; -- use simd::i8x16; -- extern "platform-intrinsic" { -- fn x86_mm_movemask_epi8(x: i8x16) -> i32; -- } -- - // Expose low-level mask instead of higher-level conclusion, - // because the non-ASCII case would perform less well otherwise. - #[inline(always)] - pub fn mask_ascii(s: u8x16) -> i32 { - unsafe { -- let signed: i8x16 = ::std::mem::transmute_copy(&s); -- x86_mm_movemask_epi8(signed) -+ _mm_movemask_epi8(__m128i::from_bits(s)) - } - } - - } else { - - } - } - - cfg_if! { - if #[cfg(target_feature = "sse2")] { - #[inline(always)] - pub fn simd_is_ascii(s: u8x16) -> bool { - unsafe { -- let signed: i8x16 = ::std::mem::transmute_copy(&s); -- x86_mm_movemask_epi8(signed) == 0 -+ _mm_movemask_epi8(__m128i::from_bits(s)) == 0 - } - } - } else if #[cfg(target_arch = "aarch64")]{ -- extern "platform-intrinsic" { -- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; -- } -- - #[inline(always)] - pub fn simd_is_ascii(s: u8x16) -> bool { - unsafe { -- aarch64_vmaxvq_u8(s) < 0x80 -+ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80 - } - } - } else { -@@ -164,35 +162,31 @@ cfg_if! { - #[inline(always)] - pub fn simd_is_str_latin1(s: u8x16) -> bool { - unsafe { -- aarch64_vmaxvq_u8(s) < 0xC4 -+ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4 - } - } - } else { - #[inline(always)] - pub fn simd_is_str_latin1(s: u8x16) -> bool { - let above_str_latin1 = u8x16::splat(0xC4); - s.lt(above_str_latin1).all() - } - } - } - - cfg_if! { - if #[cfg(target_arch = "aarch64")]{ -- extern "platform-intrinsic" { -- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; -- } -- - #[inline(always)] - pub fn simd_is_basic_latin(s: u16x8) -> bool { - unsafe { -- aarch64_vmaxvq_u16(s) < 0x80 -+ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80 - } - } - - #[inline(always)] - pub fn simd_is_latin1(s: u16x8) -> bool { - unsafe { -- aarch64_vmaxvq_u16(s) < 0x100 -+ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100 - } - } - } else { -@@ -225,7 +219,7 @@ cfg_if! { - macro_rules! aarch64_return_false_if_below_hebrew { - ($s:ident) => ({ - unsafe { -- if aarch64_vmaxvq_u16($s) < 0x0590 { -+ if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 { - return false; - } - } -@@ -292,47 +286,38 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { - #[inline(always)] - pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) { - unsafe { -- let first: u8x16 = simd_shuffle16( -+ let first: u8x16 = shuffle!( - s, - u8x16::splat(0), -- [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], -+ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ); -- let second: u8x16 = simd_shuffle16( -+ let second: u8x16 = shuffle!( - s, - u8x16::splat(0), -- [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], -+ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] - ); -- ( -- ::std::mem::transmute_copy(&first), -- ::std::mem::transmute_copy(&second), -- ) -+ (u16x8::from_bits(first), u16x8::from_bits(second)) - } - } - - cfg_if! { - if #[cfg(target_feature = "sse2")] { -- extern "platform-intrinsic" { -- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; -- } -- - #[inline(always)] - pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { - unsafe { -- let first: i16x8 = ::std::mem::transmute_copy(&a); -- let second: i16x8 = ::std::mem::transmute_copy(&b); -- x86_mm_packus_epi16(first, second) -+ u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b))) - } - } - } else { - #[inline(always)] - pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { - unsafe { -- let first: u8x16 = ::std::mem::transmute_copy(&a); -- let second: u8x16 = ::std::mem::transmute_copy(&b); -- simd_shuffle16( -+ let first = u8x16::from_bits(a); -+ let second = u8x16::from_bits(b); -+ shuffle!( - first, - second, -- [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30], -+ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) - } - } -diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs -index 6d0d613fa093..2d2076987984 100644 ---- a/third_party/rust/encoding_rs/src/x_user_defined.rs -+++ b/third_party/rust/encoding_rs/src/x_user_defined.rs -@@ -14,15 +14,12 @@ use variant::*; - cfg_if! { - if #[cfg(feature = "simd-accel")] { - use simd_funcs::*; -- use simd::u16x8; -+ use packed_simd::u16x8; - - #[inline(always)] - fn shift_upper(unpacked: u16x8) -> u16x8 { - let highest_ascii = u16x8::splat(0x7F); -- let offset = u16x8::splat(0xF700); -- let mask = unpacked.gt(highest_ascii).to_repr().to_u16(); -- unpacked + (offset & mask) -- } -+ unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } - } else { - } - } -diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml -new file mode 100644 -index 000000000000..0388cee0a07b ---- /dev/null -+++ b/third_party/rust/packed_simd/.appveyor.yml -@@ -0,0 +1,59 @@ -+matrix: -+ allow_failures: -+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72 -+ - TARGET: i686-pc-windows-msvc -+ - TARGET: i686-pc-windows-gnu -+ - TARGET: x86_64-pc-windows-gnu -+ fast_finish: true -+ -+environment: -+ matrix: -+ - TARGET: x86_64-pc-windows-msvc -+ MSYSTEM: MINGW64 -+ NOVERIFY: "1" -+ - TARGET: x86_64-pc-windows-msvc -+ MSYSTEM: MINGW64 -+ RUSTFLAGS: "-C target-feature=+sse4.2" -+ NOVERIFY: "1" -+ - TARGET: x86_64-pc-windows-msvc -+ MSYSTEM: MINGW64 -+ RUSTFLAGS: "-C target-feature=+avx" -+ NOVERIFY: "1" -+ - TARGET: x86_64-pc-windows-msvc -+ MSYSTEM: MINGW64 -+ RUSTFLAGS: "-C target-feature=+avx2" -+ NOVERIFY: "1" -+ -+ - TARGET: i686-pc-windows-msvc -+ MSYSTEM: MINGW32 -+ NOVERIFY: "1" -+ - TARGET: i686-pc-windows-msvc -+ MSYSTEM: MINGW32 -+ RUSTFLAGS: "-C target-feature=+sse4.2" -+ NOVERIFY: "1" -+ - TARGET: i686-pc-windows-msvc -+ MSYSTEM: MINGW32 -+ RUSTFLAGS: "-C target-feature=+avx" -+ NOVERIFY: "1" -+ - TARGET: i686-pc-windows-msvc -+ MSYSTEM: MINGW32 -+ RUSTFLAGS: "-C target-feature=+avx2" -+ NOVERIFY: "1" -+ -+ - TARGET: x86_64-pc-windows-gnu -+ MSYSTEM: MINGW64 -+ -+ - TARGET: i686-pc-windows-gnu -+ MSYSTEM: MINGW32 -+ - TARGET: x86_64-pc-windows-gnu -+ MSYSTEM: MINGW64 -+install: -+ - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" } -+ - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe" -+ - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null -+ - ps: $env:PATH="$env:PATH;C:\rust\bin" -+ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% -+ - rustc -vV -+ - cargo -vV -+build: false -+test_script: bash -c "ci/run.sh" -diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json -new file mode 100644 -index 000000000000..01afcc1efdac ---- /dev/null -+++ b/third_party/rust/packed_simd/.cargo-checksum.json -@@ -0,0 +1 @@ -+{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".travis.yml":"e9258d9a54fdaf4cbc12405fe5993ac4497eb2b29021691dbc91b19cb9b52227","Cargo.toml":"089941ba3c89ea111cbea3cc3abdcdcf2b9d0ae0db268d7269ee38226db950e5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"f3baefc5e5bb9b250e762a1466371b922fd7ee4243c217b2d014307603c2f57a","ci/all.sh":"a23d14e10cb26a0eb719e389c30eb955fa53cddcd436890646df09af640bd2eb","ci/android-install-ndk.sh":"0f1746108cc30bf9b9ba45bcde7b19fc1a8bdf5b0258035b4eb8dc69b75efac4","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"1ecdac757101d951794fb2ab0deaa278199cf25f2e08a15c7d40ff31a8556184","ci/docker/arm-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"e25d88f6c0c94aada3d2e3f08243f755feb7e869dc5dc505b3799719cb1af591","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"f126f4c7bae8c11ab8b16df06ad997863f0838825a9c08c9899a3eedb6d570bd","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"b647545c158ee480a4c581dbdc1f57833aef056c8d498acc04b573e842bf803c","ci/docker/i586-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/i686-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/mips-unknown-linux-gnu/Dockerfile":"323776469bb7b160385f3621d66e3ee14c75242f8180f916e65af048a29d4ea0","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"c647f6948a9a43b0be695cbed4eac752120d0faf28e5e69c718cb10406921dab","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"77bfd00cc8639509be381b394f077e39b45a00158ad61b4e1656714c714665d1","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"ec5bea6c98a3b626731fdb95f9ff2d1182639c76e8fb16d3271d0fc884901524","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"4f2b662de66e83d1354f650b7077692309637f786c2ea5516c31b5c2ee10af2d","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"a9595402b772bc365982e22a0096a8988825d90b09b5faa97ab192e76072f71d","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"df3c381c157439695ae8cd10ab71664702c061e3b4ab22906a5ad6c2680acfed","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"93fb44df3d7fd31ead158570667c97b5076a05c3d968af4a84bc13819a8f2db8","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"da1c39a3ff1fe22e41395fa7c8934e90b4c1788e551b9aec6e38bfd94effc437","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"75c0c56161c7382b439de74c00de1c0e3dc9d59560cd6720976a751034b78714","ci/docker/wasm32-unknown-unknown/Dockerfile":"3e5f294bc1e004aa599086c2af49d6f3e7459fa250f5fbdd60cf67d53db78758","ci/docker/x86_64-linux-android/Dockerfile":"685040273cf350d5509e580ac451555efa19790c8723ca2af066adadc6880ad2","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"d253c86803b22da428fa9cc671a05f18d3318eca7733b8dccb4f7be1ddf524c5","ci/dox.sh":"5b61711be47a4e3dde0ddd15ba73d256ea95fd75af3897732c24db1dc7e66366","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"63259e22a96ba539f53c06b1b39f53e3a78a71171652e7afc170836110ccd913","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"73fb981a8fdb1dcd54409d3c0fbbfb8f77a3ceabf8626a6b9bf9d21d6bc8ce72","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2cc8c9c560ae17867e69b06d09b758dbf7bc39eb774ada50a743724b10acc0a2","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"ab49712e9293a65d74d540ba4784fcb57ff1119ec05a575d895c071f1a620f64","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","readme.md":"585a8f0e16877fb9abb00cd17a175fcb9d7857840c6c61209f1827ffab095070","rustfmt.toml":"de6101d0670bad65fb3b337d56957d2a024e017e5ab146ec784d77312daaf8ff","src/api.rs":"331a3a4abb19cee2df5f2df4ad7c3e88b45e62cf23fdacfc9bbaa633dc5cf788","src/api/bit_manip.rs":"e68290ee679cc5abc9c73afbe635c1035f8cbfe849e5c751a1680e459244c39e","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"63e28c6a3edf1a7a635f51b8d3c6adbb1d46f884d92a196b3d4a6e743d809416","src/api/cast/v16.rs":"2a584eeb57fd47baad6f3533764301b04aaaac23702b7a8db12598ac02899262","src/api/cast/v256.rs":"b91c15ed8d1536ecd97b4eb79ff9d5aba0552cd9b6f0ea6435b05f2273e23b3a","src/api/cast/v32.rs":"62ec89fcce7fa7f28497ee5770adc8f81d2d3a6b2925b02f7dc06504c40e8f38","src/api/cast/v512.rs":"d855cb943ae7106e9599ef38e30a3afb1c6bd5433178baca54cb128fd9a7d143","src/api/cast/v64.rs":"fe0f7dfaf4fc0c0c1a78c96fcfcdfdc2a1e2845843b11aa797a0c6fb52a8f774","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"3ed23d2a930b0f9750c3a5309da766b03dc4f9c4d375b42ad3c50fe732693d15","src/api/cmp/partial_ord.rs":"e16b11805c94048acd058c93994b5bc74bb187f8d7e3b86a87df60e1601467f9","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"b61f92fc0e33a2633b3375eb405beba480da071cde03df4d437d8a6058afcd97","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"35cb5c266197d6224d598fb3d286e5fe48ef0c01ed356c2ff6fe9ba946f96a92","src/api/fmt/debug.rs":"aa18eea443bf353fea3db8b1a025132bbcaf91e747ecfa43b8d9fce9af395a0c","src/api/fmt/lower_hex.rs":"69d5be366631af309f214e8031c8c20267fcc27a695eac6f45c6bc1df72a67e6","src/api/fmt/octal.rs":"9eb11ba3d990213f3c7f1ec25edba7ce997cb1320e16d308c83498ba6b9bfbd9","src/api/fmt/upper_hex.rs":"a4637d085b7bb20e759ce58e08435b510a563ba3dd468af2b03560fdc5511562","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"4151593c7bba7455821fffa5b59867005a77c95d32f1f0cc3fd87294000157d9","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"562cfa3f1d8eb9a733c035a3665a599c2f1e341ee820d8fbdd102a4398a441bc","src/api/into_bits.rs":"82297f0697d67b5a015e904e7e6e7b2a7066ba825bc54b94b4ff3e22d7a1eefb","src/api/into_bits/arch_specific.rs":"1f925390b0ce7132587d95f2419c6e2ad3e1a9d17eb1d9c120a1c1c4bdf4277e","src/api/into_bits/macros.rs":"d762406de25aedff88d460dec7a80dc8e825a2a419d53218ce007efa6a1d3e04","src/api/into_bits/v128.rs":"ecdc5893664c71d7ab1ff3697c3fbe490d20d8748b9b76881d05e7625e40d74c","src/api/into_bits/v16.rs":"5459ec7dad1ad7bd30dc7e48374580b993abf23701d9c3cb22203fa0a9aabb6d","src/api/into_bits/v256.rs":"90ea351da0380ead1bf0f63b620afd40d01d638d09f7e7be31840bd2c1d9c663","src/api/into_bits/v32.rs":"ee1dc5a430050e16f51154b5fe85b1536f5feddf2ea23dd1d3859b67c4afc6fc","src/api/into_bits/v512.rs":"f72098ed1c9a23944f3d01abaf5e0f2d0e81d35a06fdadd2183e896d41b59867","src/api/into_bits/v64.rs":"6394462facdfe7827349c742b7801f1291e75a720dfb8c0b52100df46f371c98","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"78acba000d3fa527111300b6327c1932de9c4c1e02d4174e1a5615c01463d38c","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"c501a6696950cf5e521765f178de548af64fdfb6e10d026616d09fab93ca2d17","src/api/minimal/mask.rs":"42e415f536c5193d0218f5a754b34b87fd7c971bff068009f958712166ff056d","src/api/minimal/ptr.rs":"a9ee482d1dd1c956fb8f3f179e6e620b1de4e9d713961461d4c6923a4ef2e67c","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"987f8fdebeedc16e3d77c1b732e7826ef70633c541d16dfa290845d5c6289150","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"f5155dce75219f4ba11275b1f295d2fdcddd49d174a6f1fb2ace7ea42813ce41","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"03cbe8a400fd7c688e4ee771a990a6754f2031b1a59b19ae81158b21471167e5","src/api/ops/vector_shifts.rs":"9bf69d0087268f61009e39aea52e03a90f378910206b6a28e8393178b6a5d0e0","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"9ddd960365e050674b25b2fd3116e24d94669b4375d74e71c03e3f1469576066","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"e58c8c87806a95df2b2b5b48ac5991036df024096d9d7c171a480fe9282896a4","src/api/reductions/integer_arithmetic.rs":"47471da1c5f859489680bb5d34ced3d3aa20081c16053a3af121a4496fcb57bf","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"f27be3aa28e1c1f46de7890198db6e12f00c207085e89ef2de7e57ee443cdb98","src/api/select.rs":"a98e2ccf9fc6bdeed32d337c8675bc96c2fbe2cc34fbf149ad6047fb8e749774","src/api/shuffle.rs":"da58200790868c09659819322a489929a5b6e56c596ed07e6a44293ea02e7d09","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"4d4fe8a329c885fcb4fbcbedf99efb15a95296fe6b3f595056cc37037450d5ac","src/api/slice/write_to_slice.rs":"f5b23b2c4b91cfb26b713a9013a6c0da7f45eaefb79ba06dcbc27f3f23bda679","src/api/swap_bytes.rs":"4a6792a2e49a77475e1b237592b4b2804dbddb79c474331acd0dd71b36934259","src/codegen.rs":"c6eebc3d3665420aa6a2f317977e3c41a4f43e0550ac630cdbe8e4bbed5e2031","src/codegen/bit_manip.rs":"5559e095105a80003e0de35af1d19b0c65c9ab04eb743c7e01c5442d882eb34e","src/codegen/llvm.rs":"d1299c189abb17a6133f047574cffc7a6db4c1be37cb7d4785491cb5e8f8cf54","src/codegen/math.rs":"35f96e37a78fcf0cdb02146b7f27a45108fe06a37fc2a54d8851ce131a326178","src/codegen/math/float.rs":"dd86c0449e576c83b719700962ac017c332987fac08d91f2b7a2b1b883598170","src/codegen/math/float/abs.rs":"f56e2b4b8055ea861c1f5cbc6b6e1d8e7e5af163b62c13574ddee4e09513bfbc","src/codegen/math/float/cos.rs":"ef3b511a24d23045b310315e80348a9b7fedb576fc2de52d74290616a0abeb2a","src/codegen/math/float/cos_pi.rs":"4e7631a5d73dac21531e09ef1802d1180f8997509c2c8fa9f67f322194263a97","src/codegen/math/float/exp.rs":"61b691598c41b5622f24e4320c1bdd08701e612a516438bdddcc728fc3405c8c","src/codegen/math/float/ln.rs":"46b718b1ba8c9d99e1ad40f53d20dfde08a3063ca7bd2a9fdd6698e060da687e","src/codegen/math/float/macros.rs":"dd42135fff13f9aca4fd3a1a4e14c7e6c31aadc6d817d63b0d2fb9e62e062744","src/codegen/math/float/mul_add.rs":"a37bf764345d4b1714f97e83897b7cf0855fc2811704bcbc0012db91825339e1","src/codegen/math/float/mul_adde.rs":"c75702bfcb361de45964a93caf959a695ef2376bd069227600b8c6872665c755","src/codegen/math/float/powf.rs":"642346e982bc4c39203de0864d2149c4179cd7b21cf67a2951687932b4675872","src/codegen/math/float/sin.rs":"9d68164c90cdca6a85155040cdac42e27342ebe0b925273ef1593df721af4258","src/codegen/math/float/sin_cos_pi.rs":"9be02ad48585a1e8d99129382fbffbaed47852f15459256a708850b6b7a75405","src/codegen/math/float/sin_pi.rs":"9890347905b4d4a3c7341c3eb06406e46e60582bcf6960688bd727e5dadc6c57","src/codegen/math/float/sqrt.rs":"e3c60dcfb0c6d2fc62adabcc931b2d4040b83cab294dea36443fb4b89eb79e34","src/codegen/math/float/sqrte.rs":"f0f4ef9eb475ae41bcc7ec6a95ad744ba6b36925faa8b2c2814004396d196b63","src/codegen/pointer_sized_int.rs":"a70697169c28218b56fd2e8d5353f2e00671d1150d0c8cef77d613bdfacd84cb","src/codegen/reductions.rs":"645e2514746d01387ddd07f0aa4ffd8430cc9ab428d4fb13773ea319fa25dd95","src/codegen/reductions/mask.rs":"8f1afe6aabf096a3278e1fc3a30f736e04aa8b9ce96373cee22162d18cfe2702","src/codegen/reductions/mask/aarch64.rs":"cba6e17603d39795dcfe8339b6b7d8714c3e162a1f0a635979f037aa24fe4206","src/codegen/reductions/mask/arm.rs":"9447904818aa2c7c25d0963eead452a639a11ca7dbd6d21eedbfcaade07a0f33","src/codegen/reductions/mask/fallback.rs":"7a0ef9f7fd03ae318b495b95e121350cd61caffc5cc6ee17fabf130d5d933453","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"14bd2c482071f2355beebcf7b7ecf950ff2dfcdb08c3ca50993092434a9de717","src/codegen/reductions/mask/x86/avx.rs":"b4913d87844c522903641cbbf10db4551addb1ce5e9e78278e21612fa65c733b","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"226610b4ff88c676d5187114dd57b4a8800de6ce40884675e9198445b1ed0306","src/codegen/reductions/mask/x86/sse2.rs":"bc38e6c31cb4b3d62147eba6cac264e519e2a48e0f7ce9010cfa9ef0cf0ec9fd","src/codegen/shuffle.rs":"0abca97e92cdce49a58a39cc447eb09dc7d7715ef256c8dbd2181a186e61bb64","src/codegen/shuffle1_dyn.rs":"04523e9338133bdedb012dd076c2c564b79ce5593b0fc56d0fb6910e04190a81","src/codegen/swap_bytes.rs":"1d6cdc716eadddc92b4fd506b2445a821caa8dc00860447de09d7ebd69c2087f","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"96d609a9eece4dcbbcc01ba0b8744d7f5958be12774176a2945bc676f4e6b5cb","src/codegen/vSize.rs":"eeee9858749aa82142b27bc120d1989bb74a6b82e1e4efbbeaccc9634dc9acfc","src/lib.rs":"1b5d419ff05ee0370d671810423ccc254708cc8d415c1dbac2a7a36be4bf63a8","src/masks.rs":"870f429967b2d7d5133f4d28d6c753fc5cef0570b27b29d4e966a066d22d2d0e","src/sealed.rs":"ff7f0324276408ae8249941cfa32c90b8835a54d750896b683efea857af19db2","src/testing.rs":"1d3a7862ef625e235a5734ad7204e68d350f902c0695182b1f08a0552432416e","src/testing/macros.rs":"6378856d7a40ba5ec5c7c0dad6327d79f0c77266921c24296d10aed6c68e9b98","src/testing/utils.rs":"d6fd5a5017f1f85d9d99585754f8f6ad06fc3d683b34083543e67a7cc6c1772c","src/v128.rs":"18fe263c4aa28cd06461c7070b0269f69f4a2e75749b8f142a83dfdfe4d22bf5","src/v16.rs":"e5c663c9fb3547eaeac78a5f7db9969f4d8b5ec96112bf2954602fff11f0aebd","src/v256.rs":"68732cd688ad12a56d8b4f8ddf279f77bdfe1be2943c7dc0c1b4f1a76798aa0f","src/v32.rs":"785b22a1ccb4a41bb53dfeb0670f624c0ce42e6cdf62d1747e3283777a1c70bd","src/v512.rs":"d1337bfe07f06a8f37f8e8fa7d4315b9307476ee435ad80dd5269eaed564fbfa","src/v64.rs":"3077468d65125b8f085e9454c8b2463a4d5225697464ba6a1300f8799528fd4b","src/vPtr.rs":"c9a53f41f466e17b6648a4ce390fd8f4d3a848d440eb8a9a803a11608d76eb05","src/vSize.rs":"5c46d3e8c3ee5863d9b6e37e681f871386e0efc254d6d84ba711edb529ce7b3c","tests/endianness.rs":"541a144be017e3dd7da7c8ea49d907dc02538245e8c5f3deb5bd43da92c929e1"},"package":null} -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml -new file mode 100644 -index 000000000000..8d8ed54ab737 ---- /dev/null -+++ b/third_party/rust/packed_simd/.travis.yml -@@ -0,0 +1,308 @@ -+language: rust -+sudo: false -+rust: nightly -+ -+stages: -+ - tools -+ - linux-tier1 -+ - osx-tier1 -+ - osx-tier2 -+ - linux-tier2 -+ - android -+ -+matrix: -+ fast_finish: true -+ include: -+ # Android: -+ - env: TARGET=x86_64-linux-android NOVERIFY=1 -+ name: "x86_64-unknown-linux-android + SSE2" -+ stage: android -+ - env: TARGET=arm-linux-androideabi -+ name: "arm-linux-androideabi" -+ stage: android -+ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" -+ name: "arm-linux-androideabi + NEON" -+ stage: android -+ - env: TARGET=aarch64-linux-android -+ name: "aarch64-unknown-linux-android" -+ stage: android -+ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" -+ name: "aarch64-unknown-linux-android + NEON" -+ stage: android -+ - env: TARGET="thumbv7neon-linux-androideabi" -+ name: "thumbv7neon-linux-androideabi" -+ stage: android -+ # Linux: -+ - env: TARGET=i586-unknown-linux-gnu -+ name: "i586-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse" -+ name: "i586-unknown-linux-gnu + SSE" -+ stage: linux-tier2 -+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2" -+ name: "i586-unknown-linux-gnu + SSE2" -+ stage: linux-tier2 -+ - env: TARGET=i686-unknown-linux-gnu -+ name: "i686-unknown-linux-gnu + SSE2" -+ stage: linux-tier1 -+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" -+ name: "i686-unknown-linux-gnu + SSE4.2" -+ stage: linux-tier1 -+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" -+ name: "i686-unknown-linux-gnu + AVX2" -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu -+ name: "x86_64-unknown-linux-gnu + SSE2" -+ install: rustup component add rustfmt-preview -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" -+ name: "x86_64-unknown-linux-gnu + SSE4.2" -+ install: rustup component add rustfmt-preview -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx" -+ name: "x86_64-unknown-linux-gnu + AVX" -+ install: rustup component add rustfmt-preview -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" -+ name: "x86_64-unknown-linux-gnu + AVX2" -+ install: rustup component add rustfmt-preview -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu-emulated -+ name: "Intel SDE + SSE2" -+ install: true -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2" -+ name: "Intel SDE + SSE4.2" -+ install: true -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx" -+ name: "Intel SDE + AVX" -+ install: true -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2" -+ name: "Intel SDE + AVX2" -+ install: true -+ stage: linux-tier1 -+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f" -+ name: "Intel SDE + AVX-512" -+ install: true -+ stage: linux-tier1 -+ - env: TARGET=arm-unknown-linux-gnueabi -+ name: "arm-unknown-linux-gnueabi" -+ stage: linux-tier2 -+ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" -+ name: "arm-unknown-linux-gnueabi + NEON" -+ stage: linux-tier2 -+ - env: TARGET=arm-unknown-linux-gnueabihf -+ name: "arm-unknown-linux-gnueabihf" -+ stage: linux-tier2 -+ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" -+ name: "arm-unknown-linux-gnueabihf + NEON" -+ stage: linux-tier2 -+ - env: TARGET=armv7-unknown-linux-gnueabihf -+ name: "armv7-unknown-linux-gnueabihf" -+ stage: linux-tier2 -+ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" -+ name: "armv7-unknown-linux-gnueabihf + NEON" -+ stage: linux-tier2 -+ - env: TARGET="thumbv7neon-unknown-linux-gnueabihf" -+ name: "thumbv7neon-unknown-linux-gnueabihf" -+ stage: linux-tier2 -+ - env: TARGET=aarch64-unknown-linux-gnu -+ name: "aarch64-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon" -+ name: "aarch64-unknown-linux-gnu + NEON" -+ stage: linux-tier2 -+ - env: TARGET=mips-unknown-linux-gnu -+ name: "mips-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=mipsel-unknown-linux-musl -+ name: "mipsel-unknown-linux-musl" -+ stage: linux-tier2 -+ - env: TARGET=mips64-unknown-linux-gnuabi64 -+ name: "mips64-unknown-linux-gnuabi64" -+ stage: linux-tier2 -+ - env: TARGET=mips64el-unknown-linux-gnuabi64 -+ name: "mips64el-unknown-linux-gnuabi64" -+ stage: linux-tier2 -+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18 -+ # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6" -+ - env: TARGET=powerpc-unknown-linux-gnu -+ name: "powerpc-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=powerpc64-unknown-linux-gnu -+ name: "powerpc64-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=powerpc64le-unknown-linux-gnu -+ name: "powerpc64le-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" -+ name: "powerpc64le-unknown-linux-gnu + ALTIVEC" -+ stage: linux-tier2 -+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" -+ name: "powerpc64le-unknown-linux-gnu + VSX" -+ stage: linux-tier2 -+ - env: TARGET=s390x-unknown-linux-gnu -+ name: "s390x-unknown-linux-gnu" -+ stage: linux-tier2 -+ - env: TARGET=sparc64-unknown-linux-gnu -+ name: "sparc64-unknown-linux-gnu" -+ stage: linux-tier2 -+ # WebAssembly: -+ - env: TARGET=wasm32-unknown-unknown -+ name: "wasm32-unknown-unknown" -+ stage: osx-tier1 # For now -+ # MacOSX: -+ - os: osx -+ env: TARGET=i686-apple-darwin -+ name: "i686-apple-darwin + SSE2" -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ - os: osx -+ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" -+ name: "i686-apple-darwin + SSE4.2" -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ # Travis-CI OSX build bots do not support AVX2: -+ - os: osx -+ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx" -+ name: "i686-apple-darwin + AVX" -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ - os: osx -+ env: TARGET=x86_64-apple-darwin -+ name: "x86_64-apple-darwin + SSE2" -+ install: true -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ - os: osx -+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" -+ name: "x86_64-apple-darwin + SSE4.2" -+ install: true -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ # Travis-CI OSX build bots do not support AVX2: -+ - os: osx -+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx" -+ name: "x86_64-apple-darwin + AVX" -+ install: true -+ script: ci/run.sh -+ osx_image: xcode10 -+ stage: osx-tier1 -+ # *BSDs: -+ #- env: TARGET=i686-unknown-freebsd NORUN=1 -+ # script: ci/run.sh -+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 -+ # script: ci/run.sh -+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 -+ # script: ci/run.sh -+ # Solaris: -+ #- env: TARGET=x86_64-sun-solaris NORUN=1 -+ # script: ci/run.sh -+ # iOS: -+ - os: osx -+ env: TARGET=i386-apple-ios -+ name: "i386-apple-ios" -+ script: ci/run.sh -+ osx_image: xcode9.4 -+ stage: osx-tier2 -+ - os: osx -+ env: TARGET=x86_64-apple-ios -+ name: "x86_64-apple-ios + SSE2" -+ script: ci/run.sh -+ osx_image: xcode9.4 -+ stage: osx-tier2 -+ - os: osx -+ env: TARGET=armv7-apple-ios NORUN=1 -+ name: "armv7-apple-ios [Build only]" -+ script: ci/run.sh -+ osx_image: xcode9.4 -+ stage: osx-tier2 -+ - os: osx -+ env: TARGET=aarch64-apple-ios NORUN=1 -+ name: "aarch64-apple-ios [Build only]" -+ script: ci/run.sh -+ osx_image: xcode9.4 -+ stage: osx-tier2 -+ # BENCHMARKS: -+ - name: "Benchmarks - x86_64-unknown-linux-gnu" -+ install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh -+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh -+ stage: tools -+ - name: "Benchmarks - x86_64-apple-darwin" -+ install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh -+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh -+ os: osx -+ osx_image: xcode9.4 -+ stage: tools -+ # TOOLS: -+ - name: "Documentation" -+ install: cargo install mdbook -+ script: ci/dox.sh -+ stage: tools -+ - name: "rustfmt" -+ install: true -+ before_script: rustup component add rustfmt-preview -+ script: ci/all.sh check_fmt || true -+ stage: tools -+ - name: "clippy" -+ install: true -+ before_script: rustup component add clippy-preview -+ script: ci/all.sh clippy -+ stage: tools -+ -+ allow_failures: -+ # FIXME: ISPC cannot be found? -+ - name: "Benchmarks - x86_64-apple-darwin" -+ # FIXME: TBD -+ - env: TARGET=powerpc-unknown-linux-gnu -+ - env: TARGET=powerpc64-unknown-linux-gnu -+ - env: TARGET=powerpc64le-unknown-linux-gnu -+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" -+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" -+ #- env: TARGET=i686-unknown-freebsd NORUN=1 -+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 -+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 -+ #- env: TARGET=x86_64-sun-solaris NORUN=1 -+ -+ # FIXME: TBD -+ - env: TARGET=arm-linux-androideabi -+ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" -+ - env: TARGET=aarch64-linux-android -+ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" -+ -+ # FIXME: iOS -+ # https://github.com/rust-lang-nursery/packed_simd/issues/26 -+ - env: TARGET=i386-apple-ios -+ - env: TARGET=x86_64-apple-ios -+ -+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182 -+ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" -+ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" -+ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" -+ -+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183 -+ - env: TARGET=wasm32-unknown-unknown -+ -+install: travis_retry rustup target add $TARGET -+before_script: cargo generate-lockfile -+script: travis_wait 50 ci/run-docker.sh -+after_script: sleep 5 -+ -+env: -+ global: -+ secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo=" -+branches: -+ only: -+ - staging # bors r+ -+ - trying # bors try -+ - master -+notifications: -+ email: -+ on_success: never -diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml -new file mode 100644 -index 000000000000..3db9354c9407 ---- /dev/null -+++ b/third_party/rust/packed_simd/Cargo.toml -@@ -0,0 +1,42 @@ -+[package] -+name = "packed_simd" -+version = "0.3.3" -+authors = ["Gonzalo Brito Gadeschi "] -+description = "Portable Packed SIMD vectors" -+documentation = "https://docs.rs/crate/packed_simd/" -+homepage = "https://github.com/rust-lang-nursery/packed_simd" -+repository = "https://github.com/rust-lang-nursery/packed_simd" -+keywords = ["simd", "vector", "portability"] -+categories = ["hardware-support", "concurrency", "no-std", "data-structures"] -+license = "MIT/Apache-2.0" -+build = "build.rs" -+edition = "2018" -+ -+[badges] -+appveyor = { repository = "rust-lang-nursery/packed_simd" } -+travis-ci = { repository = "rust-lang-nursery/packed_simd" } -+codecov = { repository = "rust-lang-nursery/packed_simd" } -+is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } -+is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } -+maintenance = { status = "experimental" } -+ -+[dependencies] -+cfg-if = "^0.1.6" -+core_arch = { version = "^0.1.3", optional = true } -+ -+[features] -+default = [] -+into_bits = [] -+libcore_neon = [] -+ -+[dev-dependencies] -+paste = "^0.1.3" -+arrayvec = { version = "^0.4", default-features = false } -+ -+[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] -+version = "^0.1.2" -+optional = true -+ -+[target.wasm32-unknown-unknown.dev-dependencies] -+wasm-bindgen = "=0.2.19" -+wasm-bindgen-test = "=0.2.19" -\ No newline at end of file -diff --git a/third_party/rust/simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE -similarity index 100% -rename from third_party/rust/simd/LICENSE-APACHE -rename to third_party/rust/packed_simd/LICENSE-APACHE -diff --git a/third_party/rust/simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT -similarity index 93% -rename from third_party/rust/simd/LICENSE-MIT -rename to third_party/rust/packed_simd/LICENSE-MIT -index bf6c304f7774..39d4bdb5acd3 100644 ---- a/third_party/rust/simd/LICENSE-MIT -+++ b/third_party/rust/packed_simd/LICENSE-MIT -@@ -1,25 +1,25 @@ --Copyright (c) 2014 Huon Wilson -+Copyright (c) 2014 The Rust Project Developers - - Permission is hereby granted, free of charge, to any - person obtaining a copy of this software and associated - documentation files (the "Software"), to deal in the - Software without restriction, including without - limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of - the Software, and to permit persons to whom the Software - is furnished to do so, subject to the following - conditions: - - The above copyright notice and this permission notice - shall be included in all copies or substantial portions - of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF - ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT - SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR - IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER --DEALINGS IN THE SOFTWARE. -\ No newline at end of file -+DEALINGS IN THE SOFTWARE. -diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml -new file mode 100644 -index 000000000000..6d302dc85cf6 ---- /dev/null -+++ b/third_party/rust/packed_simd/bors.toml -@@ -0,0 +1,3 @@ -+status = [ -+ "continuous-integration/travis-ci/push" -+] -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs -new file mode 100644 -index 000000000000..85639ff9d085 ---- /dev/null -+++ b/third_party/rust/packed_simd/build.rs -@@ -0,0 +1,8 @@ -+fn main() { -+ println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); -+ let target = std::env::var("TARGET") -+ .expect("TARGET environment variable not defined"); -+ if target.contains("neon") { -+ println!("cargo:rustc-cfg=libcore_neon"); -+ } -+} -diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh -new file mode 100644 -index 000000000000..273562d4a9bb ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/all.sh -@@ -0,0 +1,71 @@ -+#!/usr/bin/env bash -+# -+# Performs an operation on all targets -+ -+set -ex -+ -+: "${1?The all.sh script requires one argument.}" -+ -+op=$1 -+ -+cargo_clean() { -+ cargo clean -+} -+ -+cargo_check_fmt() { -+ cargo fmt --all -- --check -+} -+ -+cargo_fmt() { -+ cargo fmt --all -+} -+ -+cargo_clippy() { -+ cargo clippy --all -- -D clippy::pedantic -+} -+ -+CMD="-1" -+ -+case $op in -+ clean*) -+ CMD=cargo_clean -+ ;; -+ check_fmt*) -+ CMD=cargo_check_fmt -+ ;; -+ fmt*) -+ CMD=cargo_fmt -+ ;; -+ clippy) -+ CMD=cargo_clippy -+ ;; -+ *) -+ echo "Unknown operation: \"${op}\"" -+ exit 1 -+ ;; -+esac -+ -+echo "Operation is: ${CMD}" -+ -+# On src/ -+$CMD -+ -+# Check examples/ -+for dir in examples/*/ -+do -+ dir=${dir%*/} -+ ( -+ cd "${dir%*/}" -+ $CMD -+ ) -+done -+ -+( -+ cd verify/verify -+ $CMD -+) -+ -+( -+ cd micro_benchmarks -+ $CMD -+) -diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh -new file mode 100644 -index 000000000000..818e78446ae8 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh -@@ -0,0 +1,37 @@ -+#!/usr/bin/env sh -+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT -+# file at the top-level directory of this distribution and at -+# http://rust-lang.org/COPYRIGHT. -+# -+# Licensed under the Apache License, Version 2.0 or the MIT license -+# , at your -+# option. This file may not be copied, modified, or distributed -+# except according to those terms. -+ -+set -ex -+ -+curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip -+unzip -q android-ndk-r15b-linux-x86_64.zip -+ -+case "$1" in -+ aarch64) -+ arch=arm64 -+ ;; -+ -+ i686) -+ arch=x86 -+ ;; -+ -+ *) -+ arch=$1 -+ ;; -+esac; -+ -+android-ndk-r15b/build/tools/make_standalone_toolchain.py \ -+ --unified-headers \ -+ --install-dir "/android/ndk-${1}" \ -+ --arch "${arch}" \ -+ --api 24 -+ -+rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b -diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh -new file mode 100644 -index 000000000000..6b5ac09ab04a ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh -@@ -0,0 +1,60 @@ -+#!/usr/bin/env sh -+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT -+# file at the top-level directory of this distribution and at -+# http://rust-lang.org/COPYRIGHT. -+# -+# Licensed under the Apache License, Version 2.0 or the MIT license -+# , at your -+# option. This file may not be copied, modified, or distributed -+# except according to those terms. -+ -+set -ex -+ -+# Prep the SDK and emulator -+# -+# Note that the update process requires that we accept a bunch of licenses, and -+# we can't just pipe `yes` into it for some reason, so we take the same strategy -+# located in https://github.com/appunite/docker by just wrapping it in a script -+# which apparently magically accepts the licenses. -+ -+mkdir sdk -+curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O -+unzip -d sdk sdk-tools-linux-3859397.zip -+ -+case "$1" in -+ arm | armv7) -+ abi=armeabi-v7a -+ ;; -+ -+ aarch64) -+ abi=arm64-v8a -+ ;; -+ -+ i686) -+ abi=x86 -+ ;; -+ -+ x86_64) -+ abi=x86_64 -+ ;; -+ -+ *) -+ echo "invalid arch: $1" -+ exit 1 -+ ;; -+esac; -+ -+# --no_https avoids -+ # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found -+yes | ./sdk/tools/bin/sdkmanager --licenses --no_https -+yes | ./sdk/tools/bin/sdkmanager --no_https \ -+ "emulator" \ -+ "platform-tools" \ -+ "platforms;android-24" \ -+ "system-images;android-24;default;$abi" -+ -+echo "no" | -+ ./sdk/tools/bin/avdmanager create avd \ -+ --name "${1}" \ -+ --package "system-images;android-24;default;$abi" -diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh -new file mode 100644 -index 000000000000..9eabd7c8d94f ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/android-sysimage.sh -@@ -0,0 +1,56 @@ -+#!/usr/bin/env bash -+ -+# Copyright 2017 The Rust Project Developers. See the COPYRIGHT -+# file at the top-level directory of this distribution and at -+# http://rust-lang.org/COPYRIGHT. -+# -+# Licensed under the Apache License, Version 2.0 or the MIT license -+# , at your -+# option. This file may not be copied, modified, or distributed -+# except according to those terms. -+ -+set -ex -+ -+URL=https://dl.google.com/android/repository/sys-img/android -+ -+main() { -+ local arch="${1}" -+ local name="${2}" -+ local dest=/system -+ local td -+ td="$(mktemp -d)" -+ -+ apt-get install --no-install-recommends e2tools -+ -+ pushd "${td}" -+ curl --retry 5 -O "${URL}/${name}" -+ unzip -q "${name}" -+ -+ local system -+ system="$(find . -name system.img)" -+ mkdir -p ${dest}/{bin,lib,lib64} -+ -+ # Extract android linker and libraries to /system -+ # This allows android executables to be run directly (or with qemu) -+ if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then -+ e2cp -p "${system}:/bin/linker64" "${dest}/bin/" -+ e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/" -+ e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/" -+ e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/" -+ else -+ e2cp -p "${system}:/bin/linker" "${dest}/bin/" -+ e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/" -+ e2cp -p "${system}:/lib/libc.so" "${dest}/lib/" -+ e2cp -p "${system}:/lib/libm.so" "${dest}/lib/" -+ fi -+ -+ # clean up -+ apt-get purge --auto-remove -y e2tools -+ -+ popd -+ -+ rm -rf "${td}" -+} -+ -+main "${@}" -diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh -new file mode 100644 -index 000000000000..3635b9e371d1 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/benchmark.sh -@@ -0,0 +1,32 @@ -+#!/usr/bin/env bash -+# -+# Runs all benchmarks. Controlled by the following environment variables: -+# -+# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc) -+# NORUN={1} - only builds the benchmarks -+ -+set -ex -+ -+if [[ ${NORUN} != 1 ]]; then -+ # Most benchmarks require hyperfine; require it upfront. -+ hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; } -+fi -+ -+ -+# If the ispc benchmark feature is enabled, ispc must be in the path of the -+# benchmarks. -+if echo "$FEATURES" | grep -q "ispc"; then -+ hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; } -+fi -+ -+# An example with a benchmark.sh is a benchmark: -+for dir in examples/*/ -+do -+ dir=${dir%*/} -+ cd ${dir%*/} -+ if [ -f "benchmark.sh" ]; then -+ ./benchmark.sh -+ fi -+ cd - -+done -+ -diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs -new file mode 100644 -index 000000000000..c0fe52c35659 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs -@@ -0,0 +1,176 @@ -+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT -+// file at the top-level directory of this distribution and at -+// http://rust-lang.org/COPYRIGHT. -+// -+// Licensed under the Apache License, Version 2.0 or the MIT license -+// , at your -+// option. This file may not be copied, modified, or distributed -+// except according to those terms. -+ -+// This is a script to deploy and execute a binary on an iOS simulator. -+// The primary use of this is to be able to run unit tests on the simulator and -+// retrieve the results. -+// -+// To do this through Cargo instead, use Dinghy -+// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy -+// test. -+ -+use std::env; -+use std::fs::{self, File}; -+use std::io::Write; -+use std::path::Path; -+use std::process; -+use std::process::Command; -+ -+macro_rules! t { -+ ($e:expr) => (match $e { -+ Ok(e) => e, -+ Err(e) => panic!("{} failed with: {}", stringify!($e), e), -+ }) -+} -+ -+// Step one: Wrap as an app -+fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) { -+ println!("Packaging simulator app"); -+ drop(fs::remove_dir_all("ios_simulator_app")); -+ t!(fs::create_dir("ios_simulator_app")); -+ t!(fs::copy(test_binary_path, -+ Path::new("ios_simulator_app").join(crate_name))); -+ -+ let mut f = t!(File::create("ios_simulator_app/Info.plist")); -+ t!(f.write_all(format!(r#" -+ -+ -+ -+ -+ CFBundleExecutable -+ {} -+ CFBundleIdentifier -+ com.rust.unittests -+ -+ -+ "#, crate_name).as_bytes())); -+} -+ -+// Step two: Start the iOS simulator -+fn start_simulator() { -+ println!("Looking for iOS simulator"); -+ let output = t!(Command::new("xcrun").arg("simctl").arg("list").output()); -+ assert!(output.status.success()); -+ let mut simulator_exists = false; -+ let mut simulator_booted = false; -+ let mut found_rust_sim = false; -+ let stdout = t!(String::from_utf8(output.stdout)); -+ for line in stdout.lines() { -+ if line.contains("rust_ios") { -+ if found_rust_sim { -+ panic!("Duplicate rust_ios simulators found. Please \ -+ double-check xcrun simctl list."); -+ } -+ simulator_exists = true; -+ simulator_booted = line.contains("(Booted)"); -+ found_rust_sim = true; -+ } -+ } -+ -+ if simulator_exists == false { -+ println!("Creating iOS simulator"); -+ Command::new("xcrun") -+ .arg("simctl") -+ .arg("create") -+ .arg("rust_ios") -+ .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE") -+ .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2") -+ .check_status(); -+ } else if simulator_booted == true { -+ println!("Shutting down already-booted simulator"); -+ Command::new("xcrun") -+ .arg("simctl") -+ .arg("shutdown") -+ .arg("rust_ios") -+ .check_status(); -+ } -+ -+ println!("Starting iOS simulator"); -+ // We can't uninstall the app (if present) as that will hang if the -+ // simulator isn't completely booted; just erase the simulator instead. -+ Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status(); -+ Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status(); -+} -+ -+// Step three: Install the app -+fn install_app_to_simulator() { -+ println!("Installing app to simulator"); -+ Command::new("xcrun") -+ .arg("simctl") -+ .arg("install") -+ .arg("booted") -+ .arg("ios_simulator_app/") -+ .check_status(); -+} -+ -+// Step four: Run the app -+fn run_app_on_simulator() { -+ println!("Running app"); -+ let output = t!(Command::new("xcrun") -+ .arg("simctl") -+ .arg("launch") -+ .arg("--console") -+ .arg("booted") -+ .arg("com.rust.unittests") -+ .output()); -+ -+ println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout)); -+ println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr)); -+ -+ let stdout = String::from_utf8_lossy(&output.stdout); -+ let failed = stdout.lines() -+ .find(|l| l.contains("FAILED")) -+ .map(|l| l.contains("FAILED")) -+ .unwrap_or(false); -+ -+ let passed = stdout.lines() -+ .find(|l| l.contains("test result: ok")) -+ .map(|l| l.contains("test result: ok")) -+ .unwrap_or(false); -+ -+ println!("Shutting down simulator"); -+ Command::new("xcrun") -+ .arg("simctl") -+ .arg("shutdown") -+ .arg("rust_ios") -+ .check_status(); -+ if !(passed && !failed) { -+ panic!("tests didn't pass"); -+ } -+} -+ -+trait CheckStatus { -+ fn check_status(&mut self); -+} -+ -+impl CheckStatus for Command { -+ fn check_status(&mut self) { -+ println!("\trunning: {:?}", self); -+ assert!(t!(self.status()).success()); -+ } -+} -+ -+fn main() { -+ let args: Vec = env::args().collect(); -+ if args.len() != 2 { -+ println!("Usage: {} ", args[0]); -+ process::exit(-1); -+ } -+ -+ let test_binary_path = Path::new(&args[1]); -+ let crate_name = test_binary_path.file_name().unwrap(); -+ -+ package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path); -+ start_simulator(); -+ install_app_to_simulator(); -+ run_app_on_simulator(); -+} -diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile -new file mode 100644 -index 000000000000..27bde89c5a8d ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile -@@ -0,0 +1,47 @@ -+FROM ubuntu:16.04 -+ -+RUN dpkg --add-architecture i386 && \ -+ apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ file \ -+ make \ -+ curl \ -+ ca-certificates \ -+ python \ -+ unzip \ -+ expect \ -+ openjdk-9-jre \ -+ libstdc++6:i386 \ -+ libpulse0 \ -+ gcc \ -+ libc6-dev -+ -+WORKDIR /android/ -+COPY android* /android/ -+ -+ENV ANDROID_ARCH=aarch64 -+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools -+ -+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH -+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH -+RUN mv /root/.android /tmp -+RUN chmod 777 -R /tmp/.android -+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* -+ -+ENV PATH=$PATH:/rust/bin \ -+ CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \ -+ CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \ -+ OBJDUMP=aarch64-linux-android-objdump \ -+ HOME=/tmp -+ -+ADD runtest-android.rs /tmp/runtest.rs -+ENTRYPOINT [ \ -+ "bash", \ -+ "-c", \ -+ # set SHELL so android can detect a 64bits system, see -+ # http://stackoverflow.com/a/41789144 -+ "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \ -+ rustc /tmp/runtest.rs -o /tmp/runtest && \ -+ exec \"$@\"", \ -+ "--" \ -+] -diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..68261a2f033d ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,14 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ gcc-aarch64-linux-gnu \ -+ libc6-dev-arm64-cross \ -+ qemu-user \ -+ make \ -+ file -+ -+ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ -+ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \ -+ OBJDUMP=aarch64-linux-gnu-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile -new file mode 100644 -index 000000000000..995a9e30e65e ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile -@@ -0,0 +1,47 @@ -+FROM ubuntu:16.04 -+ -+RUN dpkg --add-architecture i386 && \ -+ apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ file \ -+ make \ -+ curl \ -+ ca-certificates \ -+ python \ -+ unzip \ -+ expect \ -+ openjdk-9-jre \ -+ libstdc++6:i386 \ -+ libpulse0 \ -+ gcc \ -+ libc6-dev -+ -+WORKDIR /android/ -+COPY android* /android/ -+ -+ENV ANDROID_ARCH=arm -+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools -+ -+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH -+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH -+RUN mv /root/.android /tmp -+RUN chmod 777 -R /tmp/.android -+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* -+ -+ENV PATH=$PATH:/rust/bin \ -+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ -+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ -+ OBJDUMP=arm-linux-androideabi-objdump \ -+ HOME=/tmp -+ -+ADD runtest-android.rs /tmp/runtest.rs -+ENTRYPOINT [ \ -+ "bash", \ -+ "-c", \ -+ # set SHELL so android can detect a 64bits system, see -+ # http://stackoverflow.com/a/41789144 -+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ -+ rustc /tmp/runtest.rs -o /tmp/runtest && \ -+ exec \"$@\"", \ -+ "--" \ -+] -diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile -new file mode 100644 -index 000000000000..cb4de6a57eaa ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile -@@ -0,0 +1,15 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ libc6-armel-cross \ -+ libc6-dev-armel-cross \ -+ binutils-arm-linux-gnueabi \ -+ gcc-arm-linux-gnueabi \ -+ qemu-user \ -+ make \ -+ file -+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ -+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \ -+ OBJDUMP=arm-linux-gnueabi-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile -new file mode 100644 -index 000000000000..c7bd61f0a796 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile -@@ -0,0 +1,13 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ gcc-arm-linux-gnueabihf \ -+ libc6-dev-armhf-cross \ -+ qemu-user \ -+ make \ -+ file -+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ -+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ -+ OBJDUMP=arm-linux-gnueabihf-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile -new file mode 100644 -index 000000000000..e01b87afdf56 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile -@@ -0,0 +1,13 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ gcc-arm-linux-gnueabihf \ -+ libc6-dev-armhf-cross \ -+ qemu-user \ -+ make \ -+ file -+ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ -+ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ -+ OBJDUMP=arm-linux-gnueabihf-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..857974a858f1 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,7 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc-multilib \ -+ libc6-dev \ -+ file \ -+ make \ -+ ca-certificates -diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..857974a858f1 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,7 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc-multilib \ -+ libc6-dev \ -+ file \ -+ make \ -+ ca-certificates -diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..4711cead372a ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,13 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc libc6-dev qemu-user ca-certificates \ -+ gcc-mips-linux-gnu libc6-dev-mips-cross \ -+ qemu-system-mips \ -+ qemu-user \ -+ make \ -+ file -+ -+ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ -+ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \ -+ OBJDUMP=mips-linux-gnu-objdump -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile -new file mode 100644 -index 000000000000..1422e8c80924 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile -@@ -0,0 +1,10 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc libc6-dev qemu-user ca-certificates \ -+ gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \ -+ qemu-system-mips64 qemu-user -+ -+ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ -+ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \ -+ OBJDUMP=mips64-linux-gnuabi64-objdump -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile -new file mode 100644 -index 000000000000..d94deb5b2013 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile -@@ -0,0 +1,10 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc libc6-dev qemu-user ca-certificates \ -+ gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \ -+ qemu-system-mips64el -+ -+ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ -+ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \ -+ OBJDUMP=mips64el-linux-gnuabi64-objdump -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile -new file mode 100644 -index 000000000000..40ac50675bd9 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile -@@ -0,0 +1,25 @@ -+FROM ubuntu:18.10 -+ -+RUN apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ ca-certificates \ -+ gcc \ -+ libc6-dev \ -+ make \ -+ qemu-user \ -+ qemu-system-mips \ -+ bzip2 \ -+ curl \ -+ file -+ -+RUN mkdir /toolchain -+ -+# Note that this originally came from: -+# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 -+RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \ -+ tar xjf - -C /toolchain --strip-components=2 -+ -+ENV PATH=$PATH:/rust/bin:/toolchain/bin \ -+ CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \ -+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \ -+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain" -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..43b174ed87fc ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,12 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc libc6-dev qemu-user ca-certificates \ -+ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ -+ qemu-system-ppc \ -+ make \ -+ file -+ -+ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ -+ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \ -+ OBJDUMP=powerpc-linux-gnu-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..7757ad28a42d ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,17 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ gcc-powerpc64-linux-gnu \ -+ libc6-dev-ppc64-cross \ -+ qemu-user \ -+ qemu-system-ppc \ -+ make \ -+ file -+ -+ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ -+ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \ -+ CC=powerpc64-linux-gnu-gcc \ -+ OBJDUMP=powerpc64-linux-gnu-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..0b0c214fdf1b ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,11 @@ -+FROM ubuntu:17.10 -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc libc6-dev qemu-user ca-certificates \ -+ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ -+ qemu-system-ppc file make -+ -+ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ -+ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \ -+ CC=powerpc64le-linux-gnu-gcc \ -+ OBJDUMP=powerpc64le-linux-gnu-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..c645b0bcc2b8 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,20 @@ -+FROM ubuntu:18.10 -+ -+RUN apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ ca-certificates \ -+ curl \ -+ cmake \ -+ gcc \ -+ libc6-dev \ -+ g++-s390x-linux-gnu \ -+ libc6-dev-s390x-cross \ -+ qemu-user \ -+ make \ -+ file -+ -+ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \ -+ CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \ -+ CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \ -+ CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \ -+ OBJDUMP=s390x-linux-gnu-objdump -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..fe12af14da6f ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,18 @@ -+FROM debian:stretch -+ -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ curl ca-certificates \ -+ gcc libc6-dev \ -+ gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \ -+ qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \ -+ p7zip-full cpio -+ -+COPY linux-sparc64.sh / -+RUN bash /linux-sparc64.sh -+ -+COPY test-runner-linux / -+ -+ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \ -+ CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \ -+ CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \ -+ PATH=$PATH:/rust/bin -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile -new file mode 100644 -index 000000000000..c1da77109c12 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile -@@ -0,0 +1,47 @@ -+FROM ubuntu:16.04 -+ -+RUN dpkg --add-architecture i386 && \ -+ apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ file \ -+ make \ -+ curl \ -+ ca-certificates \ -+ python \ -+ unzip \ -+ expect \ -+ openjdk-9-jre \ -+ libstdc++6:i386 \ -+ libpulse0 \ -+ gcc \ -+ libc6-dev -+ -+WORKDIR /android/ -+COPY android* /android/ -+ -+ENV ANDROID_ARCH=arm -+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools -+ -+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH -+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH -+RUN mv /root/.android /tmp -+RUN chmod 777 -R /tmp/.android -+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* -+ -+ENV PATH=$PATH:/rust/bin \ -+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ -+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ -+ OBJDUMP=arm-linux-androideabi-objdump \ -+ HOME=/tmp -+ -+ADD runtest-android.rs /tmp/runtest.rs -+ENTRYPOINT [ \ -+ "bash", \ -+ "-c", \ -+ # set SHELL so android can detect a 64bits system, see -+ # http://stackoverflow.com/a/41789144 -+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ -+ rustc /tmp/runtest.rs -o /tmp/runtest && \ -+ exec \"$@\"", \ -+ "--" \ -+] -diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile -new file mode 100644 -index 000000000000..696cb6c3fb52 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile -@@ -0,0 +1,13 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ ca-certificates \ -+ libc6-dev \ -+ gcc-arm-linux-gnueabihf \ -+ libc6-dev-armhf-cross \ -+ qemu-user \ -+ make \ -+ file -+ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ -+ CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ -+ OBJDUMP=arm-linux-gnueabihf-objdump -diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile -new file mode 100644 -index 000000000000..f905cf1a36eb ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile -@@ -0,0 +1,37 @@ -+FROM ubuntu:18.04 -+ -+RUN apt-get update -y && apt-get install -y --no-install-recommends \ -+ ca-certificates \ -+ clang \ -+ cmake \ -+ curl \ -+ git \ -+ libc6-dev \ -+ make \ -+ python \ -+ xz-utils -+ -+# Install `wasm2wat` -+RUN git clone --recursive https://github.com/WebAssembly/wabt -+RUN make -C wabt -j$(nproc) -+ENV PATH=$PATH:/wabt/bin -+ -+# Install `wasm-bindgen-test-runner` -+RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.19/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl.tar.gz \ -+ | tar xzf - -+ENV PATH=$PATH:/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl -+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner -+ -+# Install `node` -+RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf - -+ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin -+ -+# We use a shim linker that removes `--strip-debug` when passed to LLD. While -+# this typically results in invalid debug information in release mode it doesn't -+# result in an invalid names section which is what we're interested in. -+COPY lld-shim.rs / -+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim -+ -+# Rustc isn't available until this container starts, so defer compilation of the -+# shim. -+ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@" -diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile -new file mode 100644 -index 000000000000..d52dd45b12bf ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile -@@ -0,0 +1,29 @@ -+FROM ubuntu:16.04 -+ -+RUN apt-get update && \ -+ apt-get install -y --no-install-recommends \ -+ ca-certificates \ -+ curl \ -+ gcc \ -+ libc-dev \ -+ python \ -+ unzip \ -+ file \ -+ make -+ -+WORKDIR /android/ -+ENV ANDROID_ARCH=x86_64 -+COPY android-install-ndk.sh /android/ -+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH -+ -+# We do not run x86_64-linux-android tests on an android emulator. -+# See ci/android-sysimage.sh for informations about how tests are run. -+COPY android-sysimage.sh /android/ -+RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip -+ -+ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \ -+ CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \ -+ CC_x86_64_linux_android=x86_64-linux-android-gcc \ -+ CXX_x86_64_linux_android=x86_64-linux-android-g++ \ -+ OBJDUMP=x86_64-linux-android-objdump \ -+ HOME=/tmp -diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile -new file mode 100644 -index 000000000000..a6bbe6653928 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile -@@ -0,0 +1,16 @@ -+FROM ubuntu:18.04 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ libc6-dev \ -+ file \ -+ make \ -+ ca-certificates \ -+ wget \ -+ bzip2 \ -+ cmake \ -+ libclang-dev \ -+ clang -+ -+RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2 -+RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2 -+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --" -diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile -new file mode 100644 -index 000000000000..e6b000d0516e ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile -@@ -0,0 +1,10 @@ -+FROM ubuntu:17.10 -+RUN apt-get update && apt-get install -y --no-install-recommends \ -+ gcc \ -+ libc6-dev \ -+ file \ -+ make \ -+ ca-certificates \ -+ cmake \ -+ libclang-dev \ -+ clang -diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh -new file mode 100644 -index 000000000000..1743366407e3 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/dox.sh -@@ -0,0 +1,24 @@ -+#!/bin/sh -+ -+set -ex -+ -+rm -rf target/doc -+mkdir -p target/doc -+ -+# Build API documentation -+cargo doc --features=into_bits -+ -+# Build Performance Guide -+# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780 -+# mdbook build perf-guide -d target/doc/perf-guide -+cd perf-guide -+mdbook build -+cd - -+cp -r perf-guide/book target/doc/perf-guide -+ -+# If we're on travis, not a PR, and on the right branch, publish! -+if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then -+ pip install ghp_import --install-option="--prefix=$HOME/.local" -+ $HOME/.local/bin/ghp-import -n target/doc -+ git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages -+fi -diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh -new file mode 100644 -index 000000000000..972abeec569e ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/linux-s390x.sh -@@ -0,0 +1,18 @@ -+set -ex -+ -+mkdir -m 777 /qemu -+cd /qemu -+ -+curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img -+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian -+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian -+ -+mv kernel.debian kernel -+mv initrd.debian initrd.gz -+ -+mkdir init -+cd init -+gunzip -c ../initrd.gz | cpio -id -+rm ../initrd.gz -+cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/ -+chmod a+w . -diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh -new file mode 100644 -index 000000000000..4452b120e1b6 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh -@@ -0,0 +1,17 @@ -+set -ex -+ -+mkdir -m 777 /qemu -+cd /qemu -+ -+curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso -+7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz -+7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64 -+mv sparc64 kernel -+rm debian-9.0-sparc64-NETINST-1.iso -+ -+mkdir init -+cd init -+gunzip -c ../initrd.gz | cpio -id -+rm ../initrd.gz -+cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/ -+chmod a+w . -diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs -new file mode 100644 -index 000000000000..10263869e8dc ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/lld-shim.rs -@@ -0,0 +1,11 @@ -+use std::os::unix::prelude::*; -+use std::process::Command; -+use std::env; -+ -+fn main() { -+ let args = env::args() -+ .skip(1) -+ .filter(|s| s != "--strip-debug") -+ .collect::>(); -+ panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec()); -+} -diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh -new file mode 100644 -index 000000000000..f70639b6f89b ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/max_line_width.sh -@@ -0,0 +1,17 @@ -+#!/usr/bin/env sh -+ -+set -x -+ -+export success=true -+ -+find . -iname '*.rs' | while read -r file; do -+ result=$(grep '.\{79\}' "${file}" | grep --invert 'http') -+ if [ "${result}" = "" ] -+ then -+ : -+ else -+ echo "file \"${file}\": $result" -+ exit 1 -+ fi -+done -+ -diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh -new file mode 100644 -index 000000000000..abdd6852fc3a ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/run-docker.sh -@@ -0,0 +1,38 @@ -+# Small script to run tests for a target (or all targets) inside all the -+# respective docker images. -+ -+set -ex -+ -+run() { -+ echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}" -+ docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/ -+ mkdir -p target -+ target=$(echo "${TARGET}" | sed 's/-emulated//') -+ echo "Running docker" -+ docker run \ -+ --user `id -u`:`id -g` \ -+ --rm \ -+ --init \ -+ --volume $HOME/.cargo:/cargo \ -+ --env CARGO_HOME=/cargo \ -+ --volume `rustc --print sysroot`:/rust:ro \ -+ --env TARGET=$target \ -+ --env NORUN \ -+ --env NOVERIFY \ -+ --env RUSTFLAGS \ -+ --volume `pwd`:/checkout:ro \ -+ --volume `pwd`/target:/checkout/target \ -+ --workdir /checkout \ -+ --privileged \ -+ packed_simd \ -+ bash \ -+ -c 'PATH=$PATH:/rust/bin exec ci/run.sh' -+} -+ -+if [ -z "${TARGET}" ]; then -+ for d in `ls ci/docker/`; do -+ run $d -+ done -+else -+ run ${TARGET} -+fi -diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh -new file mode 100644 -index 000000000000..7bb825883680 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/run.sh -@@ -0,0 +1,96 @@ -+#!/usr/bin/env bash -+ -+set -ex -+ -+: ${TARGET?"The TARGET environment variable must be set."} -+ -+# Tests are all super fast anyway, and they fault often enough on travis that -+# having only one thread increases debuggability to be worth it. -+#export RUST_TEST_THREADS=1 -+#export RUST_BACKTRACE=full -+#export RUST_TEST_NOCAPTURE=1 -+ -+# Some appveyor builds run out-of-memory; this attempts to mitigate that: -+# https://github.com/rust-lang-nursery/packed_simd/issues/39 -+# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1" -+# export CARGO_BUILD_JOBS=1 -+ -+export CARGO_SUBCMD=test -+if [[ "${NORUN}" == "1" ]]; then -+ export CARGO_SUBCMD=build -+fi -+ -+if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then -+ export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0" -+ rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest -+ export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest -+ export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest -+fi -+ -+# The source directory is read-only. Need to copy internal crates to the target -+# directory for their Cargo.lock to be properly written. -+mkdir target || true -+ -+rustc --version -+cargo --version -+echo "TARGET=${TARGET}" -+echo "HOST=${HOST}" -+echo "RUSTFLAGS=${RUSTFLAGS}" -+echo "NORUN=${NORUN}" -+echo "NOVERIFY=${NOVERIFY}" -+echo "CARGO_SUBCMD=${CARGO_SUBCMD}" -+echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}" -+echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}" -+echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}" -+echo "RUST_BACKTRACE=${RUST_BACKTRACE}" -+echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}" -+ -+cargo_test() { -+ cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}" -+ if [ "${NORUN}" != "1" ] -+ then -+ if [ "$TARGET" != "wasm32-unknown-unknown" ] -+ then -+ cmd="$cmd -- --quiet" -+ fi -+ fi -+ mkdir target || true -+ ${cmd} 2>&1 | tee > target/output -+ if [[ ${PIPESTATUS[0]} != 0 ]]; then -+ cat target/output -+ return 1 -+ fi -+} -+ -+cargo_test_impl() { -+ ORIGINAL_RUSTFLAGS=${RUSTFLAGS} -+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@} -+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@} -+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@} -+ RUSTFLAGS=${ORIGINAL_RUSTFLAGS} -+} -+ -+# Debug run: -+if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then -+ # Run wasm32-unknown-unknown in release mode only -+ cargo_test_impl -+fi -+ -+if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then -+ # use sleef on linux and windows x86_64 builds -+ cargo_test_impl --release --features=into_bits,core_arch,sleef-sys -+else -+ cargo_test_impl --release --features=into_bits,core_arch -+fi -+ -+# Verify code generation -+if [[ "${NOVERIFY}" != "1" ]]; then -+ cp -r verify/verify target/verify -+ export STDSIMD_ASSERT_INSTR_LIMIT=30 -+ if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then -+ export STDSIMD_ASSERT_INSTR_LIMIT=50 -+ fi -+ cargo_test --release --manifest-path=target/verify/Cargo.toml -+fi -+ -+. ci/run_examples.sh -diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh -new file mode 100644 -index 000000000000..5b26b18afb20 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/run_examples.sh -@@ -0,0 +1,51 @@ -+# Runs all examples. -+ -+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55 -+# All examples fail to build for `armv7-apple-ios`. -+if [[ ${TARGET} == "armv7-apple-ios" ]]; then -+ exit 0 -+fi -+ -+# FIXME: travis exceeds 50 minutes on these targets -+# Skipping the examples is an attempt at preventing travis from timing-out -+if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \ -+ || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then -+ exit 0 -+fi -+ -+if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then -+ exit 0 -+fi -+ -+cp -r examples/aobench target/aobench -+cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features -+cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit -+ -+cp -r examples/dot_product target/dot_product -+cargo_test --manifest-path=target/dot_product/Cargo.toml --release -+ -+cp -r examples/fannkuch_redux target/fannkuch_redux -+cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release -+ -+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56 -+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then -+ cp -r examples/mandelbrot target/mandelbrot -+ cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release -+fi -+ -+cp -r examples/matrix_inverse target/matrix_inverse -+cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release -+ -+cp -r examples/nbody target/nbody -+cargo_test --manifest-path=target/nbody/Cargo.toml --release -+ -+cp -r examples/spectral_norm target/spectral_norm -+cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release -+ -+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then -+ cp -r examples/stencil target/stencil -+ cargo_test --manifest-path=target/stencil/Cargo.toml --release -+fi -+ -+cp -r examples/triangle_xform target/triangle_xform -+cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release -diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs -new file mode 100644 -index 000000000000..ed1cd80c834a ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/runtest-android.rs -@@ -0,0 +1,45 @@ -+use std::env; -+use std::process::Command; -+use std::path::{Path, PathBuf}; -+ -+fn main() { -+ let args = env::args_os() -+ .skip(1) -+ .filter(|arg| arg != "--quiet") -+ .collect::>(); -+ assert_eq!(args.len(), 1); -+ let test = PathBuf::from(&args[0]); -+ let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap()); -+ -+ let status = Command::new("adb") -+ .arg("wait-for-device") -+ .status() -+ .expect("failed to run: adb wait-for-device"); -+ assert!(status.success()); -+ -+ let status = Command::new("adb") -+ .arg("push") -+ .arg(&test) -+ .arg(&dst) -+ .status() -+ .expect("failed to run: adb pushr"); -+ assert!(status.success()); -+ -+ let output = Command::new("adb") -+ .arg("shell") -+ .arg(&dst) -+ .output() -+ .expect("failed to run: adb shell"); -+ assert!(status.success()); -+ -+ println!("status: {}\nstdout ---\n{}\nstderr ---\n{}", -+ output.status, -+ String::from_utf8_lossy(&output.stdout), -+ String::from_utf8_lossy(&output.stderr)); -+ -+ let stdout = String::from_utf8_lossy(&output.stdout); -+ let mut lines = stdout.lines().filter(|l| l.starts_with("test result")); -+ if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) { -+ panic!("failed to find successful test run"); -+ } -+} -diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh -new file mode 100644 -index 000000000000..ddc4765d5ceb ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh -@@ -0,0 +1,10 @@ -+#!/usr/bin/env bash -+ -+set -ex -+ -+# Get latest ISPC binary for the target and put it in the path -+git clone https://github.com/gnzlbg/ispc-binaries -+cp ispc-binaries/ispc-${TARGET} ispc -+ -+# Rust-bindgen requires RUSTFMT -+rustup component add rustfmt-preview -diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux -new file mode 100644 -index 000000000000..0654f63bfdb9 ---- /dev/null -+++ b/third_party/rust/packed_simd/ci/test-runner-linux -@@ -0,0 +1,24 @@ -+#!/bin/sh -+ -+set -e -+ -+arch=$1 -+prog=$2 -+ -+cd /qemu/init -+cp -f $2 prog -+find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz -+cd .. -+ -+timeout 30s qemu-system-$arch \ -+ -m 1024 \ -+ -nographic \ -+ -kernel kernel \ -+ -initrd initrd.gz \ -+ -append init=/prog > output || true -+ -+# remove kernel messages -+tr -d '\r' < output | egrep -v '^\[' -+ -+# if the output contains a failure, return error -+! grep FAILED output > /dev/null -diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md -new file mode 100644 -index 000000000000..93fa92783740 ---- /dev/null -+++ b/third_party/rust/packed_simd/contributing.md -@@ -0,0 +1,67 @@ -+# Contributing to `packed_simd` -+ -+Welcome! If you are reading this document, it means you are interested in contributing -+to the `packed_simd` crate. -+ -+## Reporting issues -+ -+All issues with this crate are tracked using GitHub's [Issue Tracker]. -+ -+You can use issues to bring bugs to the attention of the maintainers, to discuss -+certain problems encountered with the crate, or to request new features (although -+feature requests should be limited to things mentioned in the [RFC]). -+ -+One thing to keep in mind is to always use the **latest** nightly toolchain when -+working on this crate. Due to the nature of this project, we use a lot of unstable -+features, meaning breakage happens often. -+ -+[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues -+[RFC]: https://github.com/rust-lang/rfcs/pull/2366 -+ -+### LLVM issues -+ -+The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation, -+and quite a few LLVM bugs have been discovered during the development of this project. -+ -+If you encounter issues with incorrect/suboptimal codegen, which you do not encounter -+when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/), -+it is likely the issue is with LLVM, or this crate's interaction with it. -+ -+You should first open an issue **in this repo** to help us track the problem, and we -+will help determine what is the exact cause of the problem. -+If LLVM is indeed the cause, the issue will be reported upstream to the -+[LLVM bugtracker](https://bugs.llvm.org/). -+ -+## Submitting Pull Requests -+ -+New code is submitted to the crate using GitHub's [pull request] mechanism. -+You should first fork this repository, make your changes (preferrably in a new -+branch), then use GitHub's web UI to create a new PR. -+ -+[pull request]: https://help.github.com/articles/about-pull-requests/ -+ -+### Examples -+ -+The `examples` directory contains code showcasing SIMD code written with this crate, -+usually in comparison to scalar or ISPC code. If you have a project / idea which -+uses SIMD, we'd love to add it to the examples list. -+ -+Every example should include a small `README`, describing the example code's purpose. -+If your example could potentially work as a benchmark, then add a `benchmark.sh` -+script to allow running the example benchmark code in CI. See an existing example's -+[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample. -+ -+Don't forget to update the crate's top-level `README` with a link to your example. -+ -+### Perf guide -+ -+The objective of the [performance guide][perf-guide] is to be a comprehensive -+resource detailing the process of optimizing Rust code with SIMD support. -+ -+If you believe a certain section could be reworded, or if you have any tips & tricks -+related to SIMD which you'd like to share, please open a PR. -+ -+[mdBook] is used to manage the formatting of the guide as a book. -+ -+[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ -+[mdBook]: https://github.com/rust-lang-nursery/mdBook -diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore -new file mode 100644 -index 000000000000..5a0bf0317d75 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/.gitignore -@@ -0,0 +1 @@ -+/book -diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml -new file mode 100644 -index 000000000000..69ba3053ca25 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/book.toml -@@ -0,0 +1,12 @@ -+[book] -+authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"] -+multilingual = false -+src = "src" -+title = "Rust SIMD Performance Guide" -+description = "This book describes how to write performant SIMD code in Rust." -+ -+[build] -+create-missing = false -+ -+[output.html] -+additional-css = ["./src/ascii.css"] -diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md -new file mode 100644 -index 000000000000..1e76898865c5 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md -@@ -0,0 +1,21 @@ -+# Summary -+ -+[Introduction](./introduction.md) -+ -+- [Floating-point Math](./float-math/fp.md) -+ - [Short-vector Math Library](./float-math/svml.md) -+ - [Approximate functions](./float-math/approx.md) -+ - [Fused multiply-accumulate](./float-math/fma.md) -+ -+- [Target features](./target-feature/features.md) -+ - [Using `RUSTFLAGS`](./target-feature/rustflags.md) -+ - [Using the `target_feature` attribute](./target-feature/attribute.md) -+ - [Interaction with inlining](./target-feature/inlining.md) -+ - [Detecting features at runtime](./target-feature/runtime.md) -+ -+- [Bounds checking](./bound_checks.md) -+- [Vertical and horizontal operations](./vert-hor-ops.md) -+ -+- [Performance profiling](./prof/profiling.md) -+ - [Profiling on Linux](./prof/linux.md) -+ - [Using machine code analyzers](./prof/mca.md) -diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css -new file mode 100644 -index 000000000000..4c02651195f9 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css -@@ -0,0 +1,4 @@ -+code { -+ /* "Source Code Pro" breaks ASCII art */ -+ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace; -+} -diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md -new file mode 100644 -index 000000000000..2eeedb5ac829 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md -@@ -0,0 +1,22 @@ -+# Bounds checking -+ -+Reading and writing packed vectors to/from slices is checked by default. -+Independently of the configuration options used, the safe functions: -+ -+* `Simd<[T; N]>::from_slice_aligned(& s[..])` -+* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])` -+ -+always check that: -+ -+* the slice is big enough to hold the vector -+* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T; -+ N]>` (this alignment is often much larger than that of `T`). -+ -+There are `_unaligned` versions that use unaligned load and stores, as well as -+`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions = -+false` / `debug = false`. That is, the `_unchecked` methods do still assert size -+and alignment in debug builds and could also do so in release builds depending -+on the configuration options. -+ -+These assertions do often significantly impact performance and you should be -+aware of them. -diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md -new file mode 100644 -index 000000000000..2237c67ec4b3 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md -@@ -0,0 +1,8 @@ -+# Approximate functions -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md -new file mode 100644 -index 000000000000..357748383d63 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md -@@ -0,0 +1,6 @@ -+# Fused Multiply Add -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md -new file mode 100644 -index 000000000000..711fcc4fd598 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md -@@ -0,0 +1,3 @@ -+# Floating-point math -+ -+This chapter contains information pertaining to working with floating-point numbers. -diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md -new file mode 100644 -index 000000000000..266c2531cc04 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md -@@ -0,0 +1,7 @@ -+# Short Vector Math Library -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md -new file mode 100644 -index 000000000000..7243e19c8a54 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md -@@ -0,0 +1,26 @@ -+# Introduction -+ -+## What is SIMD -+ -+ -+ -+## History of SIMD in Rust -+ -+ -+ -+## Discover packed_simd -+ -+ -+ -+Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately, -+not trivial. There are many pitfals that one should be aware of, and some idioms -+that help avoid those pitfalls. -+ -+This book attempts to document these best practices and provides practical examples -+on how to apply the tips to _your_ code. -diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md -new file mode 100644 -index 000000000000..96c7d67bc476 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md -@@ -0,0 +1,107 @@ -+# Performance profiling on Linux -+ -+## Using `perf` -+ -+[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler -+for Linux, featuring support for various hardware Performance Monitoring Units, -+as well as integration with the kernel's performance events framework. -+ -+We will only look at how can the `perf` command can be used to profile SIMD code. -+Full system profiling is outside of the scope of this book. -+ -+### Recording -+ -+The first step is to record a program's execution during an average workload. -+It helps if you can isolate the parts of your program which have performance -+issues, and set up a benchmark which can be easily (re)run. -+ -+Build the benchmark binary in release mode, after having enabled debug info: -+ -+```sh -+$ cargo build --release -+Finished release [optimized + debuginfo] target(s) in 0.02s -+``` -+ -+Then use the `perf record` subcommand: -+ -+```sh -+$ perf record --call-graph=dwarf ./target/release/my-program -+[ perf record: Woken up 10 times to write data ] -+[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ] -+``` -+ -+Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use -+`--call-graph=lbr` if you have a processor with support for Last Branch Record -+(i.e. Intel Haswell and newer). -+ -+`perf` will, by default, record the count of CPU cycles it takes to execute -+various parts of your program. You can use the `-e` command line option -+to enable other performance events, such as `cache-misses`. Use `perf list` -+to get a list of all hardware counters supported by your CPU. -+ -+### Viewing the report -+ -+The next step is getting a bird's eye view of the program's execution. -+`perf` provides a `ncurses`-based interface which will get you started. -+ -+Use `perf report` to open a visualization of your program's performance: -+ -+```sh -+perf report --hierarchy -M intel -+``` -+ -+`--hierarchy` will display a tree-like structure of where your program spent -+most of its time. `-M intel` enables disassembly output with Intel syntax, which -+is subjectively more readable than the default AT&T syntax. -+ -+Here is the output from profiling the `nbody` benchmark: -+ -+``` -+- 100,00% nbody -+ - 94,18% nbody -+ + 93,48% [.] nbody_lib::simd::advance -+ + 0,70% [.] nbody_lib::run -+ + 5,06% libc-2.28.so -+``` -+ -+If you move with the arrow keys to any node in the tree, you can the press `a` -+to have `perf` _annotate_ that node. This means it will: -+ -+- disassemble the function -+ -+- associate every instruction with the percentage of time which was spent executing it -+ -+- interleaves the disassembly with the source code, -+ assuming it found the debug symbols -+ (you can use `s` to toggle this behaviour) -+ -+`perf` will, by default, open the instruction which it identified as being the -+hottest spot in the function: -+ -+``` -+0,76 │ movapd xmm2,xmm0 -+0,38 │ movhlps xmm2,xmm0 -+ │ addpd xmm2,xmm0 -+ │ unpcklpd xmm1,xmm2 -+12,50 │ sqrtpd xmm0,xmm1 -+1,52 │ mulpd xmm0,xmm1 -+``` -+ -+In this case, `sqrtpd` will be highlighted in red, since that's the instruction -+which the CPU spends most of its time executing. -+ -+## Using Valgrind -+ -+Valgrind is a set of tools which initially helped C/C++ programmers find unsafe -+memory accesses in their code. Nowadays the project also has -+ -+- a heap profiler called `massif` -+ -+- a cache utilization profiler called `cachegrind` -+ -+- a call-graph performance profiler called `callgrind` -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md -new file mode 100644 -index 000000000000..65ddf1a4eb3a ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md -@@ -0,0 +1,100 @@ -+# Machine code analysis tools -+ -+## The microarchitecture of modern CPUs -+ -+While you might have heard of Instruction Set Architectures, such as `x86` or -+`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_), -+refers to the internal details of an actual family of CPUs, such as Intel's -+_Haswell_ or AMD's _Jaguar_. -+ -+Replacing scalar code with SIMD code will improve performance on all CPUs -+supporting the required vector extensions. -+However, due to microarchitectural differences, the actual speed-up at -+runtime might vary. -+ -+**Example**: a simple example arises when optimizing for AMD K8 CPUs. -+The assembly generated for an empty function should look like this: -+ -+```asm -+nop -+ret -+``` -+ -+The `nop` is used to align the `ret` instruction for better performance. -+However, the compiler will actually generated the following code: -+ -+```asm -+repz ret -+``` -+ -+The `repz` instruction will repeat the following instruction until a certain -+condition. Of course, in this situation, the function will simply immediately -+return, and the `ret` instruction is still aligned. -+However, AMD K8's branch predictor performs better with the latter code. -+ -+For those looking to absolutely maximize performance for a certain target µ-arch, -+you will have to read some CPU manuals, or ask the compiler to do it for you -+with `-C target-cpu`. -+ -+### Summary of CPU internals -+ -+Modern processors are able to execute instructions out-of-order for better performance, -+by utilizing tricks such as [branch prediction], [instruction pipelining], -+or [superscalar execution]. -+ -+[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor -+[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining -+[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor -+ -+SIMD instructions are also subject to these optimizations, meaning it can get pretty -+difficult to determine where the slowdown happens. -+For example, if the profiler reports a store operation is slow, one of two things -+could be happening: -+ -+- the store is limited by the CPU's memory bandwidth, which is actually an ideal -+ scenario, all things considered; -+ -+- memory bandwidth is nowhere near its peak, but the value to be stored is at the -+ end of a long chain of operations, and this store is where the profiler -+ encountered the pipeline stall; -+ -+Since most profilers are simple tools which don't understand the subtleties of -+instruction scheduling, you -+ -+## Analyzing the machine code -+ -+Certain tools have knowledge of internal CPU microarchitecture, i.e. they know -+ -+- how many physical [register files] a CPU actually has -+ -+- what is the latency / throughtput of an instruction -+ -+- what [µ-ops] are generated for a set of instructions -+ -+and many other architectural details. -+ -+[register files]: https://en.wikipedia.org/wiki/Register_file -+[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation -+ -+These tools are therefore able to provide accurate information as to why some -+instructions are inefficient, and where the bottleneck is. -+ -+The disadvantage is that the output of these tools requires advanced knowledge -+of the target architecture to understand, i.e. they **cannot** point out what -+the cause of the issue is explicitly. -+ -+## Intel's Architecture Code Analyzer (IACA) -+ -+[IACA] is a free tool offered by Intel for analyzing the performance of various -+computational kernels. -+ -+Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches. -+ -+[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer -+ -+## llvm-mca -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md -new file mode 100644 -index 000000000000..02ba78d2f22f ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md -@@ -0,0 +1,14 @@ -+# Performance profiling -+ -+While the rest of the book provides practical advice on how to improve the performance -+of SIMD code, this chapter is dedicated to [**performance profiling**][profiling]. -+Profiling consists of recording a program's execution in order to identify program -+hotspots. -+ -+**Important**: most profilers require debug information in order to accurately -+link the program hotspots back to the corresponding source code lines. Rust will -+disable debug info generation by default for optimized builds, but you can change -+that [in your `Cargo.toml`][cargo-ref]. -+ -+[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming) -+[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md -new file mode 100644 -index 000000000000..ee670fea5bd8 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md -@@ -0,0 +1,5 @@ -+# The `target_feature` attribute -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md -new file mode 100644 -index 000000000000..b93030ca6708 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md -@@ -0,0 +1,13 @@ -+# Enabling target features -+ -+Not all processors of a certain architecture will have SIMD processing units, -+and using a SIMD instruction which is not supported will trigger undefined behavior. -+ -+To allow building safe, portable programs, the Rust compiler will **not**, by default, -+generate any sort of vector instructions, unless it can statically determine -+they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed. -+The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of -+which features are enabled by default on various platforms, refer to the target -+specifications [in the compiler's source code][targets]. -+ -+[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md -new file mode 100644 -index 000000000000..86705102a74b ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md -@@ -0,0 +1,5 @@ -+# Inlining -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md -new file mode 100644 -index 000000000000..5b55c61c268a ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md -@@ -0,0 +1,31 @@ -+# Target features in practice -+ -+Using `RUSTFLAGS` will allow the crate being compiled, as well as all its -+transitive dependencies to use certain target features. -+ -+A tehnique used to avoid undefined behavior at runtime is to compile and -+ship multiple binaries, each compiled with a certain set of features. -+This might not be feasible in some cases, and can quickly get out of hand -+as more and more vector extensions are added to an architecture. -+ -+Rust can be more flexible: you can build a single binary/library which automatically -+picks the best supported vector instructions depending on the host machine. -+The trick consists of monomorphizing parts of the code during building, and then -+using run-time feature detection to select the right code path when running. -+ -+ -+ -+**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing -+SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause -+performance issues. -+ -+The solution is to compile all code, even the code written with 128-bit vectors, -+with the AVX target feature enabled. This will cause the compiler to prefix the -+generated instructions with the [VEX] prefix. -+ -+[VEX]: https://en.wikipedia.org/wiki/VEX_prefix -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md -new file mode 100644 -index 000000000000..47ddcc8660db ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md -@@ -0,0 +1,5 @@ -+# Detecting host features at runtime -+ -+ -diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md -new file mode 100644 -index 000000000000..e2e806e085b6 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md -@@ -0,0 +1,77 @@ -+# Using RUSTFLAGS -+ -+One of the easiest ways to benefit from SIMD is to allow the compiler -+to generate code using certain vector instruction extensions. -+ -+The environment variable `RUSTFLAGS` can be used to pass options for code -+generation to the Rust compiler. These flags will affect **all** compiled crates. -+ -+There are two flags which can be used to enable specific vector extensions: -+ -+## target-feature -+ -+- Syntax: `-C target-feature=` -+ -+- Provides the compiler with a comma-separated set of instruction extensions -+ to enable. -+ -+ **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions -+ for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and -+ [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions). -+ -+- To list target triples for all targets supported by Rust, use: -+ -+ ```sh -+ rustc --print target-list -+ ``` -+ -+- To list all support target features for a certain target triple, use: -+ -+ ```sh -+ rustc --target=${TRIPLE} --print target-features -+ ``` -+ -+- Note that all CPU features are independent, and will have to be enabled individually. -+ -+ **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though -+ all CPUs which support AVX2 also support FMA. To enable both, one has to use -+ `-C target-features=+avx2,+fma` -+ -+- Some features also depend on other features, which need to be enabled for the -+ target instructions to be generated. -+ -+ **Example**: Unless `v7` is specified as the target CPU (see below), to enable -+ NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`. -+ -+## target-cpu -+ -+- Syntax: `-C target-cpu=` -+ -+- Sets the identifier of a CPU family / model for which to build and optimize the code. -+ -+ **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'` -+ -+- To list all supported target CPUs for a certain target triple, use: -+ -+ ```sh -+ rustc --target=${TRIPLE} --print target-cpus -+ ``` -+ -+ **Example**: -+ -+ ```sh -+ rustc --target=i686-pc-windows-msvc --print target-cpus -+ ``` -+ -+- The compiler will translate this into a list of target features. Therefore, -+ individual feature checks (`#[cfg(target_feature = "...")]`) will still -+ work properly. -+ -+- It will cause the code generator to optimize the generated code for that -+ specific CPU model. -+ -+- Using `native` as the CPU model will cause Rust to generate and optimize code -+ for the CPU running the compiler. It is useful when building programs which you -+ plan to only use locally. This should never be used when the generated programs -+ are meant to be run on other computers, such as when packaging for distribution -+ or cross-compiling. -diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md -new file mode 100644 -index 000000000000..d0dd1be12a19 ---- /dev/null -+++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md -@@ -0,0 +1,76 @@ -+# Vertical and horizontal operations -+ -+In SIMD terminology, each vector has a certain "width" (number of lanes). -+A vector processor is able to perform two kinds of operations on a vector: -+ -+- Vertical operations: -+ operate on two vectors of the same width, result has same width -+ -+**Example**: vertical addition of two `f32x4` vectors -+ -+ %0 == | 2 | -3.5 | 0 | 7 | -+ + + + + -+ %1 == | 4 | 1.5 | -1 | 0 | -+ = = = = -+ %0 + %1 == | 6 | -2 | -1 | 7 | -+ -+- Horizontal operations: -+ reduce the elements of two vectors in some way, -+ the result's elements combine information from the two original ones -+ -+**Example**: horizontal addition of two `u64x2` vectors -+ -+ %0 == | 1 | 3 | -+ └─+───┘ -+ └───────┐ -+ │ -+ %1 == | 4 | -1 | │ -+ └─+──┘ │ -+ └───┐ │ -+ │ │ -+ ┌─────│───┘ -+ ▼ ▼ -+ %0 + %1 == | 4 | 3 | -+ -+## Performance consideration of horizontal operations -+ -+The result of vertical operations, like vector negation: `-a`, for a given lane, -+does not depend on the result of the operation for the other lanes. The result -+of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on -+the value of all vector lanes. -+ -+In virtually all architectures vertical operations are fast, while horizontal -+operations are, by comparison, very slow. -+ -+Consider the following two functions for computing the sum of all `f32` values -+in a slice: -+ -+```rust -+fn fast_sum(x: &[f32]) -> f32 { -+ assert!(x.len() % 4 == 0); -+ let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.] -+ for i in (0..x.len()).step_by(4) { -+ sum += f32x4::from_slice_unaligned(&x[i..]); -+ } -+ sum.sum() -+} -+ -+fn slow_sum(x: &[f32]) -> f32 { -+ assert!(x.len() % 4 == 0); -+ let mut sum: f32 = 0.; -+ for i in (0..x.len()).step_by(4) { -+ sum += f32x4::from_slice_unaligned(&x[i..]).sum(); -+ } -+ sum -+} -+``` -+ -+The inner loop over the slice is where the bulk of the work actually happens. -+There, the `fast_sum` function perform vertical operations into a vector, doing -+a single horizontal reduction at the end, while the `slow_sum` function performs -+horizontal vector operations inside of the loop. -+ -+On all widely-used architectures, `fast_sum` is a large constant factor faster -+than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On -+the particular machine tested there the algorithm using the horizontal vector -+addition is 2.7x slower than the one using vertical vector operations! -diff --git a/third_party/rust/packed_simd/readme.md b/third_party/rust/packed_simd/readme.md -new file mode 100644 -index 000000000000..3b27a2bba0d6 ---- /dev/null -+++ b/third_party/rust/packed_simd/readme.md -@@ -0,0 +1,182 @@ -+# `Simd<[T; N]>` -+ -+## Implementation of [Rust RFC #2366: `std::simd`][rfc2366] -+ -+[![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs] -+ -+> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization. -+ -+**WARNING**: this crate only supports the most recent nightly Rust toolchain. -+ -+## Documentation -+ -+* [API docs (`master` branch)][master_docs] -+* [Performance guide][perf_guide] -+* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to -+ https://github.com/rust-lang-nursery/packed_simd/issues/110 -+* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale, -+ discussion, etc. -+ -+## Examples -+ -+Most of the examples come with both a scalar and a vectorized implementation. -+ -+* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench) -+* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux) -+* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse) -+* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot) -+* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody) -+* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing) -+* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm) -+* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform) -+* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil) -+* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product) -+ -+## Cargo features -+ -+* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait -+ implementations for the vector types. These allow reinterpreting the bits of a -+ vector type as those of another vector type safely by just using the -+ `.into_bits()` method. -+ -+* `core_arch` (default: disabled): enable this feature to recompile `core::arch` -+ for the target-features enabled. `packed_simd` includes optimizations for some -+ target feature combinations that are enabled by this feature. Note, however, -+ that this is an unstable dependency, that rustc might break at any time. -+ -+* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF] -+ short-vector math library when profitable via the [`sleef-sys`][sleef_sys] -+ crate. [SLEEF] is licensed under the [Boost Software License -+ v1.0][boost_license], an extremely permissive license, and can be statically -+ linked without issues. -+ -+## Performance -+ -+The following [ISPC] examples are also part of `packed_simd`'s -+[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/) -+directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s -+Single-Program-Multiple-Data (SPMD) programming model. The performance results -+on different hardware is shown in the `readme.md` of each example. The following -+table summarizes the performance ranges, where `+` means speed-up and `-` -+slowdown: -+ -+* `aobench`: `[-1.02x, +1.53x]`, -+* `stencil`: `[+1.06x, +1.72x]`, -+* `mandelbrot`: `[-1.74x, +1.2x]`, -+* `options_pricing`: -+ * `black_scholes`: `+1.0x` -+ * `binomial_put`: `+1.4x` -+ -+ While SPMD is not the intended use case for `packed_simd`, it is possible to -+ combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming -+ model in Rust. Writing performant code is not as straightforward as with -+ [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one -+ can easily match and often out-perform [ISPC]'s "default performance". -+ -+## Platform support -+ -+The following table describes the supported platforms: `build` shows whether the -+library compiles without issues for a given target, while `run` shows whether -+the full testsuite passes on the target. -+ -+| Linux targets: | build | run | -+|-----------------------------------|-----------|---------| -+| `i586-unknown-linux-gnu` | ✓ | ✓ | -+| `i686-unknown-linux-gnu` | ✓ | ✓ | -+| `x86_64-unknown-linux-gnu` | ✓ | ✓ | -+| `arm-unknown-linux-gnueabi` | ✗ | ✗ | -+| `arm-unknown-linux-gnueabihf` | ✓ | ✓ | -+| `armv7-unknown-linux-gnueabi` | ✓ | ✓ | -+| `aarch64-unknown-linux-gnu` | ✓ | ✓ | -+| `mips-unknown-linux-gnu` | ✓ | ✓ | -+| `mipsel-unknown-linux-musl` | ✓ | ✓ | -+| `mips64-unknown-linux-gnuabi64` | ✓ | ✓ | -+| `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ | -+| `powerpc-unknown-linux-gnu` | ✗ | ✗ | -+| `powerpc64-unknown-linux-gnu` | ✗ | ✗ | -+| `powerpc64le-unknown-linux-gnu` | ✗ | ✗ | -+| `s390x-unknown-linux-gnu` | ✓ | ✓* | -+| `sparc64-unknown-linux-gnu` | ✓ | ✓* | -+| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ | -+| **MacOSX targets:** | **build** | **run** | -+| `x86_64-apple-darwin` | ✓ | ✓ | -+| `i686-apple-darwin` | ✓ | ✓ | -+| **Windows targets:** | **build** | **run** | -+| `x86_64-pc-windows-msvc` | ✓ | ✓ | -+| `i686-pc-windows-msvc` | ✓ | ✓ | -+| `x86_64-pc-windows-gnu` | ✗ | ✗ | -+| `i686-pc-windows-gnu` | ✗ | ✗ | -+| **WebAssembly targets:** | **build** | **run** | -+| `wasm32-unknown-unknown` | ✓ | ✓ | -+| **Android targets:** | **build** | **run** | -+| `x86_64-linux-android` | ✓ | ✓ | -+| `arm-linux-androideabi` | ✓ | ✓ | -+| `aarch64-linux-android` | ✓ | ✗ | -+| `thumbv7neon-linux-androideabi` | ✓ | ✓ | -+| **iOS targets:** | **build** | **run** | -+| `i386-apple-ios` | ✓ | ✗ | -+| `x86_64-apple-ios` | ✓ | ✗ | -+| `armv7-apple-ios` | ✓ | ✗** | -+| `aarch64-apple-ios` | ✓ | ✗** | -+| **xBSD targets:** | **build** | **run** | -+| `i686-unknown-freebsd` | ✗ | ✗** | -+| `x86_64-unknown-freebsd` | ✗ | ✗** | -+| `x86_64-unknown-netbsd` | ✗ | ✗** | -+| **Solaris targets:** | **build** | **run** | -+| `x86_64-sun-solaris` | ✗ | ✗** | -+ -+[*] most of the test suite passes correctly on these platform but -+there are correctness bugs open in the issue tracker. -+ -+[**] it is currently not easily possible to run these platforms on CI. -+ -+## Machine code verification -+ -+The -+[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify) -+crate tests disassembles the portable packed vector APIs at run-time and -+compares the generated machine code against the desired one to make sure that -+this crate remains efficient. -+ -+## License -+ -+This project is licensed under either of -+ -+* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) -+ ([LICENSE-APACHE](LICENSE-APACHE)) -+ -+* [MIT License](http://opensource.org/licenses/MIT) -+ ([LICENSE-MIT](LICENSE-MIT)) -+ -+at your option. -+ -+## Contributing -+ -+We welcome all people who want to contribute. -+Please see the [contributing instructions] for more information. -+ -+Contributions in any form (issues, pull requests, etc.) to this project -+must adhere to Rust's [Code of Conduct]. -+ -+Unless you explicitly state otherwise, any contribution intentionally submitted -+for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be -+dual licensed as above, without any additional terms or conditions. -+ -+[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd -+[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master -+[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd -+[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true -+[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg -+[crates.io]: https://crates.io/crates/packed_simd -+[docs]: https://docs.rs/packed_simd/badge.svg -+[docs.rs]: https://docs.rs/packed_simd/ -+[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/ -+[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ -+[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366 -+[ISPC]: https://ispc.github.io/ -+[rayon]: https://crates.io/crates/rayon -+[boost_license]: https://www.boost.org/LICENSE_1_0.txt -+[SLEEF]: https://sleef.org/ -+[sleef_sys]: https://crates.io/crates/sleef-sys -+[contributing instructions]: contributing.md -+[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html -diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml -new file mode 100644 -index 000000000000..5b400a4ce440 ---- /dev/null -+++ b/third_party/rust/packed_simd/rustfmt.toml -@@ -0,0 +1,7 @@ -+max_width = 79 -+use_small_heuristics = "Max" -+wrap_comments = true -+comment_width = 79 -+fn_args_density = "Compressed" -+edition = "2018" -+error_on_line_overflow = true -\ No newline at end of file -diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs -new file mode 100644 -index 000000000000..9959a052ae96 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api.rs -@@ -0,0 +1,301 @@ -+//! Implements the Simd<[T; N]> APIs -+ -+crate mod cast; -+#[macro_use] -+mod cmp; -+#[macro_use] -+mod default; -+#[macro_use] -+mod fmt; -+#[macro_use] -+mod from; -+#[macro_use] -+mod hash; -+#[macro_use] -+mod math; -+#[macro_use] -+mod minimal; -+#[macro_use] -+mod ops; -+#[macro_use] -+mod ptr; -+#[macro_use] -+mod reductions; -+#[macro_use] -+mod select; -+#[macro_use] -+mod shuffle; -+#[macro_use] -+mod shuffle1_dyn; -+#[macro_use] -+mod slice; -+#[macro_use] -+mod swap_bytes; -+#[macro_use] -+mod bit_manip; -+ -+#[cfg(feature = "into_bits")] -+crate mod into_bits; -+ -+macro_rules! impl_i { -+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident -+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* -+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { -+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | $($elem_ids),* | $(#[$doc])*); -+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) -+ ); -+ impl_ops_scalar_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) -+ ); -+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_int_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt -+ ); -+ impl_reduction_integer_arithmetic!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ ); -+ impl_reduction_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ ); -+ impl_reduction_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) -+ ); -+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); -+ impl_from_vectors!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* -+ ); -+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_partial_eq!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1) -+ ); -+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); -+ impl_cmp_vertical!( -+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt -+ ); -+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); -+ -+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); -+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ } -+} -+ -+macro_rules! impl_u { -+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident -+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* -+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { -+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | $($elem_ids),* | $(#[$doc])*); -+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) -+ ); -+ impl_ops_scalar_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) -+ ); -+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_int_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt -+ ); -+ impl_reduction_integer_arithmetic!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ ); -+ impl_reduction_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ ); -+ impl_reduction_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) -+ ); -+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); -+ impl_from_vectors!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* -+ ); -+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_partial_eq!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0) -+ ); -+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); -+ impl_cmp_vertical!( -+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt -+ ); -+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); -+ -+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); -+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ } -+} -+ -+macro_rules! impl_f { -+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident -+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* -+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { -+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | $($elem_ids),* | $(#[$doc])*); -+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_ops_vector_float_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt -+ ); -+ impl_reduction_float_arithmetic!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_reduction_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ ); -+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.)); -+ impl_from_vectors!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* -+ ); -+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_partial_eq!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.) -+ ); -+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ -+ impl_float_consts!([$elem_ty; $elem_n]: $tuple_id); -+ impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty); -+ -+ // floating-point math -+ impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_vertical!( -+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.) -+ | $test_tt -+ ); -+ -+ test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt); -+ test_reduction_float_min_max!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt -+ ); -+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ } -+} -+ -+macro_rules! impl_m { -+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident -+ | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* -+ | $(#[$doc:meta])*) => { -+ impl_minimal_mask!( -+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | $($elem_ids),* | $(#[$doc])* -+ ); -+ impl_ops_vector_mask_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) -+ ); -+ impl_ops_scalar_mask_bitwise!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) -+ ); -+ impl_reduction_bitwise!( -+ [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt -+ | (|x|{ x != 0 }) | (true, false) -+ ); -+ impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt); -+ impl_from_array!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt -+ | (crate::$elem_ty::new(true), true) -+ ); -+ impl_from_vectors!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* -+ ); -+ impl_default!([bool; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_partial_eq!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) -+ ); -+ impl_cmp_eq!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) -+ ); -+ impl_cmp_vertical!( -+ [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false) -+ | $test_tt -+ ); -+ impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ impl_cmp_ord!( -+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true) -+ ); -+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ -+ test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); -+ } -+} -+ -+macro_rules! impl_const_p { -+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, -+ $usize_ty:ident, $isize_ty:ident -+ | $test_tt:tt | $($elem_ids:ident),* -+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { -+ impl_minimal_p!( -+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty -+ | ref_ | $test_tt | $($elem_ids),* -+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* -+ ); -+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); -+ } -+} -+ -+macro_rules! impl_mut_p { -+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, -+ $usize_ty:ident, $isize_ty:ident -+ | $test_tt:tt | $($elem_ids:ident),* -+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { -+ impl_minimal_p!( -+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty -+ | ref_mut_ | $test_tt | $($elem_ids),* -+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* -+ ); -+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); -+ impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs -new file mode 100644 -index 000000000000..3d3c4eb8850a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/bit_manip.rs -@@ -0,0 +1,128 @@ -+//! Bit manipulations. -+ -+macro_rules! impl_bit_manip { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Returns the number of ones in the binary representation of -+ /// the lanes of `self`. -+ #[inline] -+ pub fn count_ones(self) -> Self { -+ super::codegen::bit_manip::BitManip::ctpop(self) -+ } -+ -+ /// Returns the number of zeros in the binary representation of -+ /// the lanes of `self`. -+ #[inline] -+ pub fn count_zeros(self) -> Self { -+ super::codegen::bit_manip::BitManip::ctpop(!self) -+ } -+ -+ /// Returns the number of leading zeros in the binary -+ /// representation of the lanes of `self`. -+ #[inline] -+ pub fn leading_zeros(self) -> Self { -+ super::codegen::bit_manip::BitManip::ctlz(self) -+ } -+ -+ /// Returns the number of trailing zeros in the binary -+ /// representation of the lanes of `self`. -+ #[inline] -+ pub fn trailing_zeros(self) -> Self { -+ super::codegen::bit_manip::BitManip::cttz(self) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item_with_macros! { -+ #[allow(overflowing_literals)] -+ pub mod [<$id _bit_manip>] { -+ use super::*; -+ -+ const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; -+ -+ macro_rules! test_func { -+ ($x:expr, $func:ident) => {{ -+ let mut actual = $x; -+ for i in 0..$id::lanes() { -+ actual = actual.replace( -+ i, -+ $x.extract(i).$func() as $elem_ty -+ ); -+ } -+ let expected = $x.$func(); -+ assert_eq!(actual, expected); -+ }}; -+ } -+ -+ const BYTES: [u8; 64] = [ -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ 16, 17, 18, 19, 20, 21, 22, 23, -+ 24, 25, 26, 27, 28, 29, 30, 31, -+ 32, 33, 34, 35, 36, 37, 38, 39, -+ 40, 41, 42, 43, 44, 45, 46, 47, -+ 48, 49, 50, 51, 52, 53, 54, 55, -+ 56, 57, 58, 59, 60, 61, 62, 63, -+ ]; -+ -+ fn load_bytes() -> $id { -+ let elems: &mut [$elem_ty] = unsafe { -+ slice::from_raw_parts_mut( -+ BYTES.as_mut_ptr() as *mut $elem_ty, -+ $id::lanes(), -+ ) -+ }; -+ $id::from_slice_unaligned(elems) -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn count_ones() { -+ test_func!($id::splat(0), count_ones); -+ test_func!($id::splat(!0), count_ones); -+ test_func!(load_bytes(), count_ones); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn count_zeros() { -+ test_func!($id::splat(0), count_zeros); -+ test_func!($id::splat(!0), count_zeros); -+ test_func!(load_bytes(), count_zeros); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn leading_zeros() { -+ test_func!($id::splat(0), leading_zeros); -+ test_func!($id::splat(1), leading_zeros); -+ // some implementations use `pshufb` which has unique -+ // behavior when the 8th bit is set. -+ test_func!($id::splat(0b1000_0010), leading_zeros); -+ test_func!($id::splat(!0), leading_zeros); -+ test_func!( -+ $id::splat(1 << (LANE_WIDTH - 1)), -+ leading_zeros -+ ); -+ test_func!(load_bytes(), leading_zeros); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn trailing_zeros() { -+ test_func!($id::splat(0), trailing_zeros); -+ test_func!($id::splat(1), trailing_zeros); -+ test_func!($id::splat(0b1000_0010), trailing_zeros); -+ test_func!($id::splat(!0), trailing_zeros); -+ test_func!( -+ $id::splat(1 << (LANE_WIDTH - 1)), -+ trailing_zeros -+ ); -+ test_func!(load_bytes(), trailing_zeros); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs -new file mode 100644 -index 000000000000..f1c32ca1a38b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast.rs -@@ -0,0 +1,108 @@ -+//! Implementation of `FromCast` and `IntoCast`. -+#![allow(clippy::module_name_repetitions)] -+ -+/// Numeric cast from `T` to `Self`. -+/// -+/// > Note: This is a temporary workaround until the conversion traits -+/// specified > in [RFC2484] are implemented. -+/// -+/// Numeric cast between vectors with the same number of lanes, such that: -+/// -+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` -+/// -> `u32xN`) is a **no-op**, -+/// -+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> -+/// `u8xN`) will **truncate**, -+/// -+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> -+/// `u32xN`) will: -+/// * **zero-extend** if the source is unsigned, or -+/// * **sign-extend** if the source is signed, -+/// -+/// * casting from a float to an integer will **round the float towards zero**, -+/// -+/// * casting from an integer to float will produce the floating point -+/// representation of the integer, **rounding to nearest, ties to even**, -+/// -+/// * casting from an `f32` to an `f64` is perfect and lossless, -+/// -+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. -+/// -+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 -+pub trait FromCast: crate::marker::Sized { -+ /// Numeric cast from `T` to `Self`. -+ fn from_cast(_: T) -> Self; -+} -+ -+/// Numeric cast from `Self` to `T`. -+/// -+/// > Note: This is a temporary workaround until the conversion traits -+/// specified > in [RFC2484] are implemented. -+/// -+/// Numeric cast between vectors with the same number of lanes, such that: -+/// -+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` -+/// -> `u32xN`) is a **no-op**, -+/// -+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> -+/// `u8xN`) will **truncate**, -+/// -+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> -+/// `u32xN`) will: -+/// * **zero-extend** if the source is unsigned, or -+/// * **sign-extend** if the source is signed, -+/// -+/// * casting from a float to an integer will **round the float towards zero**, -+/// -+/// * casting from an integer to float will produce the floating point -+/// representation of the integer, **rounding to nearest, ties to even**, -+/// -+/// * casting from an `f32` to an `f64` is perfect and lossless, -+/// -+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. -+/// -+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 -+pub trait Cast: crate::marker::Sized { -+ /// Numeric cast from `self` to `T`. -+ fn cast(self) -> T; -+} -+ -+/// `FromCast` implies `Cast`. -+impl Cast for T -+where -+ U: FromCast, -+{ -+ #[inline] -+ fn cast(self) -> U { -+ U::from_cast(self) -+ } -+} -+ -+/// `FromCast` and `Cast` are reflexive -+impl FromCast for T { -+ #[inline] -+ fn from_cast(t: Self) -> Self { -+ t -+ } -+} -+ -+#[macro_use] -+mod macros; -+ -+mod v16; -+pub use self::v16::*; -+ -+mod v32; -+pub use self::v32::*; -+ -+mod v64; -+pub use self::v64::*; -+ -+mod v128; -+pub use self::v128::*; -+ -+mod v256; -+pub use self::v256::*; -+ -+mod v512; -+pub use self::v512::*; -diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs -new file mode 100644 -index 000000000000..3bb29f0b80b7 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/macros.rs -@@ -0,0 +1,82 @@ -+//! Macros implementing `FromCast` -+ -+macro_rules! impl_from_cast_ { -+ ($id:ident[$test_tt:tt]: $from_ty:ident) => { -+ impl crate::api::cast::FromCast<$from_ty> for $id { -+ #[inline] -+ fn from_cast(x: $from_ty) -> Self { -+ use crate::llvm::simd_cast; -+ debug_assert_eq!($from_ty::lanes(), $id::lanes()); -+ Simd(unsafe { simd_cast(x.0) }) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _from_cast_ $from_ty>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn test() { -+ assert_eq!($id::lanes(), $from_ty::lanes()); -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_from_cast { -+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { -+ $( -+ impl_from_cast_!($id[$test_tt]: $from_ty); -+ )* -+ } -+} -+ -+macro_rules! impl_from_cast_mask_ { -+ ($id:ident[$test_tt:tt]: $from_ty:ident) => { -+ impl crate::api::cast::FromCast<$from_ty> for $id { -+ #[inline] -+ fn from_cast(x: $from_ty) -> Self { -+ debug_assert_eq!($from_ty::lanes(), $id::lanes()); -+ x.ne($from_ty::default()) -+ .select($id::splat(true), $id::splat(false)) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _from_cast_ $from_ty>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn test() { -+ assert_eq!($id::lanes(), $from_ty::lanes()); -+ -+ let x = $from_ty::default(); -+ let m: $id = x.cast(); -+ assert!(m.none()); -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_from_cast_mask { -+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { -+ $( -+ impl_from_cast_mask_!($id[$test_tt]: $from_ty); -+ )* -+ } -+} -+ -+#[allow(unused)] -+macro_rules! impl_into_cast { -+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { -+ $( -+ impl_from_cast_!($from_ty[$test_tt]: $id); -+ )* -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs -new file mode 100644 -index 000000000000..78c07f3a5597 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v128.rs -@@ -0,0 +1,79 @@ -+//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!( -+ i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 -+); -+impl_from_cast!( -+ u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 -+); -+impl_from_cast_mask!( -+ m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 -+); -+ -+impl_from_cast!( -+ i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast_mask!( -+ m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+ -+impl_from_cast!( -+ i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast_mask!( -+ m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+ -+impl_from_cast!( -+ i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast_mask!( -+ m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+ -+impl_from_cast!( -+ isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 -+); -+impl_from_cast!( -+ usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 -+); -+impl_from_cast_mask!( -+ msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 -+); -+ -+// FIXME[test_v128]: 64-bit single element vectors into_cast impls -+impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); -+impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); -+impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); -diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs -new file mode 100644 -index 000000000000..d292936baa41 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v16.rs -@@ -0,0 +1,17 @@ -+//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!( -+ i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast_mask!( -+ m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs -new file mode 100644 -index 000000000000..0a669e0beebe ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v256.rs -@@ -0,0 +1,81 @@ -+//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); -+impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); -+impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); -+ -+impl_from_cast!( -+ i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, -+ i32x16, u32x16, f32x16, m32x16 -+); -+impl_from_cast!( -+ u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, -+ i32x16, u32x16, f32x16, m32x16 -+); -+impl_from_cast_mask!( -+ m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, -+ i32x16, u32x16, f32x16, m32x16 -+); -+ -+impl_from_cast!( -+ i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast_mask!( -+ m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+ -+impl_from_cast!( -+ i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast_mask!( -+ m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+ -+impl_from_cast!( -+ i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast_mask!( -+ m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 -+); -+ -+impl_from_cast!( -+ isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 -+); -+impl_from_cast!( -+ usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 -+); -+impl_from_cast_mask!( -+ msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 -+); -diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs -new file mode 100644 -index 000000000000..65050cdacb4e ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v32.rs -@@ -0,0 +1,30 @@ -+//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!( -+ i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast_mask!( -+ m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+ -+impl_from_cast!( -+ i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast_mask!( -+ m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs -new file mode 100644 -index 000000000000..9ae1caed35e2 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v512.rs -@@ -0,0 +1,68 @@ -+//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); -+impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); -+impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); -+ -+impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); -+impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); -+impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); -+ -+impl_from_cast!( -+ i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 -+); -+impl_from_cast!( -+ u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 -+); -+impl_from_cast!( -+ f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 -+); -+impl_from_cast_mask!( -+ m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 -+); -+ -+impl_from_cast!( -+ i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast_mask!( -+ m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 -+); -+ -+impl_from_cast!( -+ i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast_mask!( -+ m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 -+); -+ -+impl_from_cast!( -+ isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 -+); -+impl_from_cast!( -+ usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 -+); -+impl_from_cast_mask!( -+ msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 -+); -diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs -new file mode 100644 -index 000000000000..0e2f78f7335b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cast/v64.rs -@@ -0,0 +1,47 @@ -+//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_from_cast!( -+ i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast!( -+ u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+impl_from_cast_mask!( -+ m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, -+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 -+); -+ -+impl_from_cast!( -+ i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast!( -+ u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+impl_from_cast_mask!( -+ m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, -+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 -+); -+ -+impl_from_cast!( -+ i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast!( -+ f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -+impl_from_cast_mask!( -+ m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, -+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 -+); -diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs -new file mode 100644 -index 000000000000..6d5301ddddbd ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp.rs -@@ -0,0 +1,16 @@ -+//! Implement cmp traits for vector types -+ -+#[macro_use] -+mod partial_eq; -+ -+#[macro_use] -+mod eq; -+ -+#[macro_use] -+mod partial_ord; -+ -+#[macro_use] -+mod ord; -+ -+#[macro_use] -+mod vertical; -diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs -new file mode 100644 -index 000000000000..3c55d0dce57e ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs -@@ -0,0 +1,27 @@ -+//! Implements `Eq` for vector types. -+ -+macro_rules! impl_cmp_eq { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl crate::cmp::Eq for $id {} -+ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_eq>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn eq() { -+ fn foo(_: E) {} -+ let a = $id::splat($false); -+ foo(a); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs -new file mode 100644 -index 000000000000..e54ba3bfde9a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs -@@ -0,0 +1,43 @@ -+//! Implements `Ord` for vector types. -+ -+macro_rules! impl_cmp_ord { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl $id { -+ /// Returns a wrapper that implements `Ord`. -+ #[inline] -+ pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { -+ LexicographicallyOrdered(*self) -+ } -+ } -+ -+ impl crate::cmp::Ord for LexicographicallyOrdered<$id> { -+ #[inline] -+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { -+ match self.partial_cmp(other) { -+ Some(x) => x, -+ None => unsafe { crate::hint::unreachable_unchecked() }, -+ } -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_ord>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn eq() { -+ fn foo(_: E) {} -+ let a = $id::splat($false); -+ foo(a.partial_lex_ord()); -+ foo(a.lex_ord()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs -new file mode 100644 -index 000000000000..1712a0de56cb ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs -@@ -0,0 +1,67 @@ -+//! Implements `PartialEq` for vector types. -+ -+macro_rules! impl_cmp_partial_eq { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 -+ #[allow(clippy::partialeq_ne_impl)] -+ impl crate::cmp::PartialEq<$id> for $id { -+ #[inline] -+ fn eq(&self, other: &Self) -> bool { -+ $id::eq(*self, *other).all() -+ } -+ #[inline] -+ fn ne(&self, other: &Self) -> bool { -+ $id::ne(*self, *other).any() -+ } -+ } -+ -+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 -+ #[allow(clippy::partialeq_ne_impl)] -+ impl crate::cmp::PartialEq> -+ for LexicographicallyOrdered<$id> -+ { -+ #[inline] -+ fn eq(&self, other: &Self) -> bool { -+ self.0 == other.0 -+ } -+ #[inline] -+ fn ne(&self, other: &Self) -> bool { -+ self.0 != other.0 -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_PartialEq>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn partial_eq() { -+ let a = $id::splat($false); -+ let b = $id::splat($true); -+ -+ assert!(a != b); -+ assert!(!(a == b)); -+ assert!(a == a); -+ assert!(!(a != a)); -+ -+ if $id::lanes() > 1 { -+ let a = $id::splat($false).replace(0, $true); -+ let b = $id::splat($true); -+ -+ assert!(a != b); -+ assert!(!(a == b)); -+ assert!(a == a); -+ assert!(!(a != a)); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs -new file mode 100644 -index 000000000000..a2292918bae1 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs -@@ -0,0 +1,234 @@ -+//! Implements `PartialOrd` for vector types. -+//! -+//! This implements a lexicographical order. -+ -+macro_rules! impl_cmp_partial_ord { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Returns a wrapper that implements `PartialOrd`. -+ #[inline] -+ pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { -+ LexicographicallyOrdered(*self) -+ } -+ } -+ -+ impl crate::cmp::PartialOrd> -+ for LexicographicallyOrdered<$id> -+ { -+ #[inline] -+ fn partial_cmp( -+ &self, other: &Self, -+ ) -> Option { -+ if PartialEq::eq(self, other) { -+ Some(crate::cmp::Ordering::Equal) -+ } else if PartialOrd::lt(self, other) { -+ Some(crate::cmp::Ordering::Less) -+ } else if PartialOrd::gt(self, other) { -+ Some(crate::cmp::Ordering::Greater) -+ } else { -+ None -+ } -+ } -+ #[inline] -+ fn lt(&self, other: &Self) -> bool { -+ let m_lt = self.0.lt(other.0); -+ let m_eq = self.0.eq(other.0); -+ for i in 0..$id::lanes() { -+ if m_eq.extract(i) { -+ continue; -+ } -+ return m_lt.extract(i); -+ } -+ false -+ } -+ #[inline] -+ fn le(&self, other: &Self) -> bool { -+ self.lt(other) | PartialEq::eq(self, other) -+ } -+ #[inline] -+ fn ge(&self, other: &Self) -> bool { -+ self.gt(other) | PartialEq::eq(self, other) -+ } -+ #[inline] -+ fn gt(&self, other: &Self) -> bool { -+ let m_gt = self.0.gt(other.0); -+ let m_eq = self.0.eq(other.0); -+ for i in 0..$id::lanes() { -+ if m_eq.extract(i) { -+ continue; -+ } -+ return m_gt.extract(i); -+ } -+ false -+ } -+ } -+ }; -+} -+ -+macro_rules! test_cmp_partial_ord_int { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_PartialOrd>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn partial_lex_ord() { -+ use crate::testing::utils::{test_cmp}; -+ // constant values -+ let a = $id::splat(0); -+ let b = $id::splat(1); -+ -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ -+ // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] -+ let mut a = $id::splat(0); -+ let mut b = $id::splat(0); -+ for i in 0..$id::lanes() { -+ a = a.replace(i, i as $elem_ty); -+ b = b.replace(i, ($id::lanes() - i) as $elem_ty); -+ } -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ -+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] -+ let mut b = a; -+ b = b.replace( -+ $id::lanes() - 1, -+ a.extract($id::lanes() - 1) + 1 as $elem_ty -+ ); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ -+ if $id::lanes() > 2 { -+ // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] -+ let b = a; -+ let mut a = $id::splat(0); -+ a = a.replace(1, 1 as $elem_ty); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ -+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] -+ let mut b = a; -+ b = b.replace( -+ 2, a.extract($id::lanes() - 1) + 1 as $elem_ty -+ ); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(crate::cmp::Ordering::Equal)); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! test_cmp_partial_ord_mask { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_PartialOrd>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn partial_lex_ord() { -+ use crate::testing::utils::{test_cmp}; -+ use crate::cmp::Ordering; -+ -+ // constant values -+ let a = $id::splat(false); -+ let b = $id::splat(true); -+ -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ -+ // variable values: -+ // a = [false, false, false, false]; -+ // b = [false, false, false, true] -+ let a = $id::splat(false); -+ let mut b = $id::splat(false); -+ b = b.replace($id::lanes() - 1, true); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ -+ // variable values: -+ // a = [true, true, true, false]; -+ // b = [true, true, true, true] -+ let mut a = $id::splat(true); -+ let b = $id::splat(true); -+ a = a.replace($id::lanes() - 1, false); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ -+ if $id::lanes() > 2 { -+ // variable values -+ // a = [false, true, false, false]; -+ // b = [false, true, true, true] -+ let mut a = $id::splat(false); -+ let mut b = $id::splat(true); -+ a = a.replace(1, true); -+ b = b.replace(0, false); -+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Less)); -+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Greater)); -+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), -+ Some(Ordering::Equal)); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs -new file mode 100644 -index 000000000000..ea4a0d1a3467 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs -@@ -0,0 +1,114 @@ -+//! Vertical (lane-wise) vector comparisons returning vector masks. -+ -+macro_rules! impl_cmp_vertical { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident, -+ $mask_ty:ident, -+ $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt -+ ) => { -+ impl $id { -+ /// Lane-wise equality comparison. -+ #[inline] -+ pub fn eq(self, other: Self) -> $mask_ty { -+ use crate::llvm::simd_eq; -+ Simd(unsafe { simd_eq(self.0, other.0) }) -+ } -+ -+ /// Lane-wise inequality comparison. -+ #[inline] -+ pub fn ne(self, other: Self) -> $mask_ty { -+ use crate::llvm::simd_ne; -+ Simd(unsafe { simd_ne(self.0, other.0) }) -+ } -+ -+ /// Lane-wise less-than comparison. -+ #[inline] -+ pub fn lt(self, other: Self) -> $mask_ty { -+ use crate::llvm::{simd_gt, simd_lt}; -+ if $is_mask { -+ Simd(unsafe { simd_gt(self.0, other.0) }) -+ } else { -+ Simd(unsafe { simd_lt(self.0, other.0) }) -+ } -+ } -+ -+ /// Lane-wise less-than-or-equals comparison. -+ #[inline] -+ pub fn le(self, other: Self) -> $mask_ty { -+ use crate::llvm::{simd_ge, simd_le}; -+ if $is_mask { -+ Simd(unsafe { simd_ge(self.0, other.0) }) -+ } else { -+ Simd(unsafe { simd_le(self.0, other.0) }) -+ } -+ } -+ -+ /// Lane-wise greater-than comparison. -+ #[inline] -+ pub fn gt(self, other: Self) -> $mask_ty { -+ use crate::llvm::{simd_gt, simd_lt}; -+ if $is_mask { -+ Simd(unsafe { simd_lt(self.0, other.0) }) -+ } else { -+ Simd(unsafe { simd_gt(self.0, other.0) }) -+ } -+ } -+ -+ /// Lane-wise greater-than-or-equals comparison. -+ #[inline] -+ pub fn ge(self, other: Self) -> $mask_ty { -+ use crate::llvm::{simd_ge, simd_le}; -+ if $is_mask { -+ Simd(unsafe { simd_le(self.0, other.0) }) -+ } else { -+ Simd(unsafe { simd_ge(self.0, other.0) }) -+ } -+ } -+ } -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_vertical>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn cmp() { -+ let a = $id::splat($false); -+ let b = $id::splat($true); -+ -+ let r = a.lt(b); -+ let e = $mask_ty::splat(true); -+ assert!(r == e); -+ let r = a.le(b); -+ assert!(r == e); -+ -+ let e = $mask_ty::splat(false); -+ let r = a.gt(b); -+ assert!(r == e); -+ let r = a.ge(b); -+ assert!(r == e); -+ let r = a.eq(b); -+ assert!(r == e); -+ -+ let mut a = a; -+ let mut b = b; -+ let mut e = e; -+ for i in 0..$id::lanes() { -+ if i % 2 == 0 { -+ a = a.replace(i, $false); -+ b = b.replace(i, $true); -+ e = e.replace(i, true); -+ } else { -+ a = a.replace(i, $true); -+ b = b.replace(i, $false); -+ e = e.replace(i, false); -+ } -+ } -+ let r = a.lt(b); -+ assert!(r == e); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs -new file mode 100644 -index 000000000000..843d51bcc4bb ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/default.rs -@@ -0,0 +1,28 @@ -+//! Implements `Default` for vector types. -+ -+macro_rules! impl_default { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl Default for $id { -+ #[inline] -+ fn default() -> Self { -+ Self::splat($elem_ty::default()) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _default>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn default() { -+ let a = $id::default(); -+ for i in 0..$id::lanes() { -+ assert_eq!(a.extract(i), $elem_ty::default()); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs -new file mode 100644 -index 000000000000..f3f55c401548 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt.rs -@@ -0,0 +1,12 @@ -+//! Implements formatting APIs -+ -+#[macro_use] -+mod debug; -+#[macro_use] -+mod lower_hex; -+#[macro_use] -+mod upper_hex; -+#[macro_use] -+mod octal; -+#[macro_use] -+mod binary; -diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs -new file mode 100644 -index 000000000000..b60769082d51 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs -@@ -0,0 +1,56 @@ -+//! Implement Octal formatting -+ -+macro_rules! impl_fmt_binary { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::fmt::Binary for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt( -+ &self, f: &mut crate::fmt::Formatter<'_>, -+ ) -> crate::fmt::Result { -+ write!(f, "{}(", stringify!($id))?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_binary>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn binary() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::splat($elem_ty::default()); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:#b}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!(s.starts_with(beg.as_str())); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "") -+ .replace(")", "").split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:#b}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs -new file mode 100644 -index 000000000000..ad0b8a59a1f0 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs -@@ -0,0 +1,62 @@ -+//! Implement debug formatting -+ -+macro_rules! impl_fmt_debug_tests { -+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_debug>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn debug() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::default(); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:?}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!(s.starts_with(beg.as_str())); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "") -+ .replace(")", "").split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:?}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_fmt_debug { -+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::fmt::Debug for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt( -+ &self, f: &mut crate::fmt::Formatter<'_>, -+ ) -> crate::fmt::Result { -+ write!(f, "{}(", stringify!($id))?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs -new file mode 100644 -index 000000000000..5a7aa14b5b8a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs -@@ -0,0 +1,56 @@ -+//! Implement `LowerHex` formatting -+ -+macro_rules! impl_fmt_lower_hex { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::fmt::LowerHex for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt( -+ &self, f: &mut crate::fmt::Formatter<'_>, -+ ) -> crate::fmt::Result { -+ write!(f, "{}(", stringify!($id))?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_lower_hex>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn lower_hex() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::splat($elem_ty::default()); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:#x}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!(s.starts_with(beg.as_str())); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "").replace(")", "") -+ .split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:#x}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs -new file mode 100644 -index 000000000000..83ac8abc7dae ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs -@@ -0,0 +1,56 @@ -+//! Implement Octal formatting -+ -+macro_rules! impl_fmt_octal { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::fmt::Octal for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt( -+ &self, f: &mut crate::fmt::Formatter<'_>, -+ ) -> crate::fmt::Result { -+ write!(f, "{}(", stringify!($id))?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_octal>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn octal_hex() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::splat($elem_ty::default()); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:#o}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!(s.starts_with(beg.as_str())); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "").replace(")", "") -+ .split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:#o}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs -new file mode 100644 -index 000000000000..aa88f673abf0 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs -@@ -0,0 +1,56 @@ -+//! Implement `UpperHex` formatting -+ -+macro_rules! impl_fmt_upper_hex { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::fmt::UpperHex for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt( -+ &self, f: &mut crate::fmt::Formatter<'_>, -+ ) -> crate::fmt::Result { -+ write!(f, "{}(", stringify!($id))?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_upper_hex>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn upper_hex() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::splat($elem_ty::default()); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:#X}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!(s.starts_with(beg.as_str())); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "").replace(")", "") -+ .split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:#X}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs -new file mode 100644 -index 000000000000..c30c4d6e216d ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/from.rs -@@ -0,0 +1,7 @@ -+//! Implementations of the `From` and `Into` traits -+ -+#[macro_use] -+mod from_array; -+ -+#[macro_use] -+mod from_vector; -diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs -new file mode 100644 -index 000000000000..964d1501df6a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/from/from_array.rs -@@ -0,0 +1,121 @@ -+//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. -+ -+macro_rules! impl_from_array { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt -+ | ($non_default_array:expr, $non_default_vec:expr)) => { -+ impl From<[$elem_ty; $elem_count]> for $id { -+ #[inline] -+ fn from(array: [$elem_ty; $elem_count]) -> Self { -+ union U { -+ array: [$elem_ty; $elem_count], -+ vec: $id, -+ } -+ unsafe { U { array }.vec } -+ } -+ } -+ -+ impl From<$id> for [$elem_ty; $elem_count] { -+ #[inline] -+ fn from(vec: $id) -> Self { -+ union U { -+ array: [$elem_ty; $elem_count], -+ vec: $id, -+ } -+ unsafe { U { vec }.array } -+ } -+ } -+ -+ // FIXME: `Into::into` is not inline, but due to -+ // the blanket impl in `std`, which is not -+ // marked `default`, we cannot override it here with -+ // specialization. -+ /* -+ impl Into<[$elem_ty; $elem_count]> for $id { -+ #[inline] -+ fn into(self) -> [$elem_ty; $elem_count] { -+ union U { -+ array: [$elem_ty; $elem_count], -+ vec: $id, -+ } -+ unsafe { U { vec: self }.array } -+ } -+ } -+ -+ impl Into<$id> for [$elem_ty; $elem_count] { -+ #[inline] -+ fn into(self) -> $id { -+ union U { -+ array: [$elem_ty; $elem_count], -+ vec: $id, -+ } -+ unsafe { U { array: self }.vec } -+ } -+ } -+ */ -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ mod [<$id _from>] { -+ use super::*; -+ #[test] -+ fn array() { -+ let vec: $id = Default::default(); -+ -+ // FIXME: Workaround for arrays with more than 32 -+ // elements. -+ // -+ // Safe because we never take a reference to any -+ // uninitialized element. -+ union W { -+ array: [$elem_ty; $elem_count], -+ other: () -+ } -+ let mut array = W { other: () }; -+ for i in 0..$elem_count { -+ let default: $elem_ty = Default::default(); -+ // note: array.other is the active member and -+ // initialized so we can take a reference to it: -+ let p = unsafe { -+ &mut array.other as *mut () as *mut $elem_ty -+ }; -+ // note: default is a valid bit-pattern for -+ // $elem_ty: -+ unsafe { -+ crate::ptr::write(p.wrapping_add(i), default) -+ }; -+ } -+ // note: the array variant of the union is properly -+ // initialized: -+ let mut array = unsafe { -+ array.array -+ }; -+ -+ array[0] = $non_default_array; -+ let vec = vec.replace(0, $non_default_vec); -+ -+ let vec_from_array = $id::from(array); -+ assert_eq!(vec_from_array, vec); -+ let array_from_vec -+ = <[$elem_ty; $elem_count]>::from(vec); -+ // FIXME: Workaround for arrays with more than 32 -+ // elements. -+ for i in 0..$elem_count { -+ assert_eq!(array_from_vec[i], array[i]); -+ } -+ -+ let vec_from_into_array: $id = array.into(); -+ assert_eq!(vec_from_into_array, vec); -+ let array_from_into_vec: [$elem_ty; $elem_count] -+ = vec.into(); -+ // FIXME: Workaround for arrays with more than 32 -+ // elements. -+ for i in 0..$elem_count { -+ assert_eq!(array_from_into_vec[i], array[i]); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs -new file mode 100644 -index 000000000000..55f70016d51d ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs -@@ -0,0 +1,67 @@ -+//! Implements `From` and `Into` for vector types. -+ -+macro_rules! impl_from_vector { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt -+ | $source:ident) => { -+ impl From<$source> for $id { -+ #[inline] -+ fn from(source: $source) -> Self { -+ fn static_assert_same_number_of_lanes() -+ where -+ T: crate::sealed::Simd, -+ U: crate::sealed::Simd, -+ { -+ } -+ use crate::llvm::simd_cast; -+ static_assert_same_number_of_lanes::<$id, $source>(); -+ Simd(unsafe { simd_cast(source.0) }) -+ } -+ } -+ -+ // FIXME: `Into::into` is not inline, but due to the blanket impl in -+ // `std`, which is not marked `default`, we cannot override it here -+ // with specialization. -+ -+ /* -+ impl Into<$id> for $source { -+ #[inline] -+ fn into(self) -> $id { -+ unsafe { simd_cast(self) } -+ } -+ } -+ */ -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _from_ $source>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from() { -+ assert_eq!($id::lanes(), $source::lanes()); -+ let source: $source = Default::default(); -+ let vec: $id = Default::default(); -+ -+ let e = $id::from(source); -+ assert_eq!(e, vec); -+ -+ let e: $id = source.into(); -+ assert_eq!(e, vec); -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_from_vectors { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt -+ | $($source:ident),*) => { -+ $( -+ impl_from_vector!( -+ [$elem_ty; $elem_count]: $id | $test_tt | $source -+ ); -+ )* -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs -new file mode 100644 -index 000000000000..08d42496ea8b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/hash.rs -@@ -0,0 +1,47 @@ -+//! Implements `Hash` for vector types. -+ -+macro_rules! impl_hash { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::hash::Hash for $id { -+ #[inline] -+ fn hash(&self, state: &mut H) { -+ unsafe { -+ union A { -+ data: [$elem_ty; $id::lanes()], -+ vec: $id, -+ } -+ A { vec: *self }.data.hash(state) -+ } -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _hash>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn hash() { -+ use crate::hash::{Hash, Hasher}; -+ #[allow(deprecated)] -+ use crate::hash::{SipHasher13}; -+ type A = [$elem_ty; $id::lanes()]; -+ let a: A = [42 as $elem_ty; $id::lanes()]; -+ assert_eq!( -+ crate::mem::size_of::(), -+ crate::mem::size_of::<$id>() -+ ); -+ #[allow(deprecated)] -+ let mut a_hash = SipHasher13::new(); -+ let mut v_hash = a_hash.clone(); -+ a.hash(&mut a_hash); -+ -+ let v = $id::splat(42 as $elem_ty); -+ v.hash(&mut v_hash); -+ assert_eq!(a_hash.finish(), v_hash.finish()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs -new file mode 100644 -index 000000000000..f2cc1bae5397 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits.rs -@@ -0,0 +1,59 @@ -+//! Implementation of `FromBits` and `IntoBits`. -+ -+/// Safe lossless bitwise conversion from `T` to `Self`. -+pub trait FromBits: crate::marker::Sized { -+ /// Safe lossless bitwise transmute from `T` to `Self`. -+ fn from_bits(t: T) -> Self; -+} -+ -+/// Safe lossless bitwise conversion from `Self` to `T`. -+pub trait IntoBits: crate::marker::Sized { -+ /// Safe lossless bitwise transmute from `self` to `T`. -+ fn into_bits(self) -> T; -+} -+ -+/// `FromBits` implies `IntoBits`. -+impl IntoBits for T -+where -+ U: FromBits, -+{ -+ #[inline] -+ fn into_bits(self) -> U { -+ debug_assert!( -+ crate::mem::size_of::() == crate::mem::size_of::() -+ ); -+ U::from_bits(self) -+ } -+} -+ -+/// `FromBits` and `IntoBits` are reflexive -+impl FromBits for T { -+ #[inline] -+ fn from_bits(t: Self) -> Self { -+ t -+ } -+} -+ -+#[macro_use] -+mod macros; -+ -+mod v16; -+pub use self::v16::*; -+ -+mod v32; -+pub use self::v32::*; -+ -+mod v64; -+pub use self::v64::*; -+ -+mod v128; -+pub use self::v128::*; -+ -+mod v256; -+pub use self::v256::*; -+ -+mod v512; -+pub use self::v512::*; -+ -+mod arch_specific; -+pub use self::arch_specific::*; -diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs -new file mode 100644 -index 000000000000..6cc2fa37b728 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs -@@ -0,0 +1,190 @@ -+//! `FromBits` and `IntoBits` between portable vector types and the -+//! architecture-specific vector types. -+#![rustfmt::skip] -+ -+// FIXME: MIPS FromBits/IntoBits -+ -+#[allow(unused)] -+use crate::*; -+ -+/// This macro implements FromBits for the portable and the architecture -+/// specific vector types. -+/// -+/// The "leaf" case is at the bottom, and the most generic case is at the top. -+/// The generic case is split into smaller cases recursively. -+macro_rules! impl_arch { -+ ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], -+ $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | -+ from: $($from_ty:ident),* | into: $($into_ty:ident),* | -+ test: $test_tt:tt) => { -+ impl_arch!( -+ [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | -+ from: $($from_ty),* | -+ into: $($into_ty),* | -+ test: $test_tt -+ ); -+ impl_arch!( -+ $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | -+ from: $($from_ty),* | -+ into: $($into_ty),* | -+ test: $test_tt -+ ); -+ }; -+ ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | -+ from: $($from_ty:ident),* | into: $($into_ty:ident),* | -+ test: $test_tt:tt) => { -+ // note: if target is "arm", "+v7,+neon" must be enabled -+ // and the std library must be recompiled with them -+ #[cfg(any( -+ not(target_arch = "arm"), -+ all(target_feature = "v7", target_feature = "neon", -+ any(feature = "core_arch", libcore_neon))) -+ )] -+ // note: if target is "powerpc", "altivec" must be enabled -+ // and the std library must be recompiled with it -+ #[cfg(any( -+ not(target_arch = "powerpc"), -+ all(target_feature = "altivec", feature = "core_arch"), -+ ))] -+ #[cfg(target_arch = $arch_tt)] -+ use crate::arch::$arch::{ -+ $($arch_ty),* -+ }; -+ -+ #[cfg(any( -+ not(target_arch = "arm"), -+ all(target_feature = "v7", target_feature = "neon", -+ any(feature = "core_arch", libcore_neon))) -+ )] -+ #[cfg(any( -+ not(target_arch = "powerpc"), -+ all(target_feature = "altivec", feature = "core_arch"), -+ ))] -+ #[cfg(target_arch = $arch_tt)] -+ impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | -+ test: $test_tt); -+ }; -+ ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* -+ | $($into_ty:ident),* | test: $test_tt:tt) => { -+ impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | -+ test: $test_tt); -+ impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | -+ test: $test_tt); -+ }; -+ ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | -+ test: $test_tt:tt) => { -+ impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); -+ impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); -+ }; -+} -+ -+//////////////////////////////////////////////////////////////////////////////// -+// Implementations for the 64-bit wide vector types: -+ -+// FIXME: 64-bit single element types -+// FIXME: arm/aarch float16x4_t missing -+impl_arch!( -+ [x86["x86"]: __m64], [x86_64["x86_64"]: __m64], -+ [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, -+ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, -+ uint64x1_t], -+ [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, -+ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, -+ float64x1_t] | -+ from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | -+ into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | -+ test: test_v64 -+); -+ -+//////////////////////////////////////////////////////////////////////////////// -+// Implementations for the 128-bit wide vector types: -+ -+// FIXME: arm/aarch float16x8_t missing -+// FIXME: ppc vector_pixel missing -+// FIXME: ppc64 vector_Float16 missing -+// FIXME: ppc64 vector_signed_long_long missing -+// FIXME: ppc64 vector_unsigned_long_long missing -+// FIXME: ppc64 vector_bool_long_long missing -+// FIXME: ppc64 vector_signed___int128 missing -+// FIXME: ppc64 vector_unsigned___int128 missing -+impl_arch!( -+ [x86["x86"]: __m128, __m128i, __m128d], -+ [x86_64["x86_64"]: __m128, __m128i, __m128d], -+ [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, -+ poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], -+ [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, -+ uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, -+ uint64x2_t, float64x2_t], -+ [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, -+ vector_signed_short, vector_unsigned_short, vector_signed_int, -+ vector_unsigned_int, vector_float], -+ [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, -+ vector_signed_short, vector_unsigned_short, vector_signed_int, -+ vector_unsigned_int, vector_float, vector_signed_long, -+ vector_unsigned_long, vector_double] | -+ from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, -+ i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | -+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, -+ i128x1, u128x1 | -+ test: test_v128 -+); -+ -+impl_arch!( -+ [powerpc["powerpc"]: vector_bool_char], -+ [powerpc64["powerpc64"]: vector_bool_char] | -+ from: m8x16, m16x8, m32x4, m64x2, m128x1 | -+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, -+ i64x2, u64x2, f64x2, i128x1, u128x1, -+ // Masks: -+ m8x16 | -+ test: test_v128 -+); -+ -+impl_arch!( -+ [powerpc["powerpc"]: vector_bool_short], -+ [powerpc64["powerpc64"]: vector_bool_short] | -+ from: m16x8, m32x4, m64x2, m128x1 | -+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, -+ i64x2, u64x2, f64x2, i128x1, u128x1, -+ // Masks: -+ m8x16, m16x8 | -+ test: test_v128 -+); -+ -+impl_arch!( -+ [powerpc["powerpc"]: vector_bool_int], -+ [powerpc64["powerpc64"]: vector_bool_int] | -+ from: m32x4, m64x2, m128x1 | -+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, -+ i64x2, u64x2, f64x2, i128x1, u128x1, -+ // Masks: -+ m8x16, m16x8, m32x4 | -+ test: test_v128 -+); -+ -+impl_arch!( -+ [powerpc64["powerpc64"]: vector_bool_long] | -+ from: m64x2, m128x1 | -+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, -+ i64x2, u64x2, f64x2, i128x1, u128x1, -+ // Masks: -+ m8x16, m16x8, m32x4, m64x2 | -+ test: test_v128 -+); -+ -+//////////////////////////////////////////////////////////////////////////////// -+// Implementations for the 256-bit wide vector types -+ -+impl_arch!( -+ [x86["x86"]: __m256, __m256i, __m256d], -+ [x86_64["x86_64"]: __m256, __m256i, __m256d] | -+ from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, -+ i32x8, u32x8, f32x8, m32x8, -+ i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | -+ into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, -+ i64x4, u64x4, f64x4, i128x2, u128x2 | -+ test: test_v256 -+); -+ -+//////////////////////////////////////////////////////////////////////////////// -+// FIXME: Implementations for the 512-bit wide vector types -diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs -new file mode 100644 -index 000000000000..8cec5b00479f ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs -@@ -0,0 +1,74 @@ -+//! Macros implementing `FromBits` -+ -+macro_rules! impl_from_bits_ { -+ ($id:ident[$test_tt:tt]: $from_ty:ident) => { -+ impl crate::api::into_bits::FromBits<$from_ty> for $id { -+ #[inline] -+ fn from_bits(x: $from_ty) -> Self { -+ unsafe { crate::mem::transmute(x) } -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _from_bits_ $from_ty>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn test() { -+ use crate::{ -+ ptr::{read_unaligned}, -+ mem::{size_of, zeroed} -+ }; -+ use crate::IntoBits; -+ assert_eq!(size_of::<$id>(), -+ size_of::<$from_ty>()); -+ // This is safe becasue we never create a reference to -+ // uninitialized memory: -+ let a: $from_ty = unsafe { zeroed() }; -+ -+ let b_0: $id = crate::FromBits::from_bits(a); -+ let b_1: $id = a.into_bits(); -+ -+ // Check that these are byte-wise equal, that is, -+ // that the bit patterns are identical: -+ for i in 0..size_of::<$id>() { -+ // This is safe because we only read initialized -+ // memory in bounds. Also, taking a reference to -+ // `b_i` is ok because the fields are initialized. -+ unsafe { -+ let b_0_v: u8 = read_unaligned( -+ (&b_0 as *const $id as *const u8) -+ .wrapping_add(i) -+ ); -+ let b_1_v: u8 = read_unaligned( -+ (&b_1 as *const $id as *const u8) -+ .wrapping_add(i) -+ ); -+ assert_eq!(b_0_v, b_1_v); -+ } -+ } -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_from_bits { -+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { -+ $( -+ impl_from_bits_!($id[$test_tt]: $from_ty); -+ )* -+ } -+} -+ -+#[allow(unused)] -+macro_rules! impl_into_bits { -+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { -+ $( -+ impl_from_bits_!($from_ty[$test_tt]: $id); -+ )* -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs -new file mode 100644 -index 000000000000..804dbf282d53 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs -@@ -0,0 +1,28 @@ -+//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); -+ -+impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); -+ -+impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); -+ -+impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); -+impl_from_bits!(m64x2[test_v128]: m128x1); -+ -+impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); -+impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); -+// note: m128x1 cannot be constructed from all the other masks bit patterns in here -+ -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs -new file mode 100644 -index 000000000000..1162a62e5bd1 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs -@@ -0,0 +1,9 @@ -+//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); -+impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); -+// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs -new file mode 100644 -index 000000000000..cc7a6646b535 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs -@@ -0,0 +1,27 @@ -+//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); -+ -+impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); -+ -+impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); -+ -+impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); -+impl_from_bits!(m64x4[test_v256]: m128x2); -+ -+impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); -+impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); -+// note: m128x2 cannot be constructed from all the other masks bit patterns in here -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs -new file mode 100644 -index 000000000000..2c183ecf1c77 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs -@@ -0,0 +1,13 @@ -+//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); -+impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); -+impl_from_bits!(m8x4[test_v32]: m16x2); -+ -+impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); -+impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); -+// note: m16x2 cannot be constructed from all m8x4 bit patterns -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs -new file mode 100644 -index 000000000000..8dec6a7f63a0 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs -@@ -0,0 +1,27 @@ -+//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); -+ -+impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); -+ -+impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); -+ -+impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); -+impl_from_bits!(m64x8[test_v512]: m128x4); -+ -+impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); -+impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); -+// note: m128x4 cannot be constructed from all the other masks bit patterns in here -diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs -new file mode 100644 -index 000000000000..8999d98e13f8 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs -@@ -0,0 +1,18 @@ -+//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors -+#![rustfmt::skip] -+ -+#[allow(unused)] // wasm_bindgen_test -+use crate::*; -+ -+impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); -+impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); -+impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); -+ -+impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); -+impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); -+impl_from_bits!(m16x4[test_v64]: m32x2); -+ -+impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); -+impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); -+impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); -+// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns -diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs -new file mode 100644 -index 000000000000..e7a8d256baf5 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math.rs -@@ -0,0 +1,4 @@ -+//! Implements vertical math operations -+ -+#[macro_use] -+mod float; -diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs -new file mode 100644 -index 000000000000..c0ec46e91789 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float.rs -@@ -0,0 +1,61 @@ -+//! Implements vertical floating-point math operations. -+ -+#[macro_use] -+mod abs; -+ -+#[macro_use] -+mod consts; -+ -+#[macro_use] -+mod cos; -+ -+#[macro_use] -+mod exp; -+ -+#[macro_use] -+mod powf; -+ -+#[macro_use] -+mod ln; -+ -+#[macro_use] -+mod mul_add; -+ -+#[macro_use] -+mod mul_adde; -+ -+#[macro_use] -+mod recpre; -+ -+#[macro_use] -+mod rsqrte; -+ -+#[macro_use] -+mod sin; -+ -+#[macro_use] -+mod sqrt; -+ -+#[macro_use] -+mod sqrte; -+ -+macro_rules! impl_float_category { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { -+ impl $id { -+ #[inline] -+ pub fn is_nan(self) -> $mask_ty { -+ self.ne(self) -+ } -+ -+ #[inline] -+ pub fn is_infinite(self) -> $mask_ty { -+ self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) -+ } -+ -+ #[inline] -+ pub fn is_finite(self) -> $mask_ty { -+ !(self.is_nan() | self.is_infinite()) -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs -new file mode 100644 -index 000000000000..1865bdb68ec6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs -@@ -0,0 +1,31 @@ -+//! Implements vertical (lane-wise) floating-point `abs`. -+ -+macro_rules! impl_math_float_abs { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Absolute value. -+ #[inline] -+ pub fn abs(self) -> Self { -+ use crate::codegen::math::float::abs::Abs; -+ Abs::abs(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_abs>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn abs() { -+ let o = $id::splat(1 as $elem_ty); -+ assert_eq!(o, o.abs()); -+ -+ let mo = $id::splat(-1 as $elem_ty); -+ assert_eq!(o, mo.abs()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs -new file mode 100644 -index 000000000000..89f93a6d692b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs -@@ -0,0 +1,86 @@ -+macro_rules! impl_float_consts { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { -+ impl $id { -+ /// Machine epsilon value. -+ pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); -+ -+ /// Smallest finite value. -+ pub const MIN: $id = $id::splat(core::$elem_ty::MIN); -+ -+ /// Smallest positive normal value. -+ pub const MIN_POSITIVE: $id = -+ $id::splat(core::$elem_ty::MIN_POSITIVE); -+ -+ /// Largest finite value. -+ pub const MAX: $id = $id::splat(core::$elem_ty::MAX); -+ -+ /// Not a Number (NaN). -+ pub const NAN: $id = $id::splat(core::$elem_ty::NAN); -+ -+ /// Infinity (∞). -+ pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); -+ -+ /// Negative infinity (-∞). -+ pub const NEG_INFINITY: $id = -+ $id::splat(core::$elem_ty::NEG_INFINITY); -+ -+ /// Archimedes' constant (π) -+ pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); -+ -+ /// π/2 -+ pub const FRAC_PI_2: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_PI_2); -+ -+ /// π/3 -+ pub const FRAC_PI_3: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_PI_3); -+ -+ /// π/4 -+ pub const FRAC_PI_4: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_PI_4); -+ -+ /// π/6 -+ pub const FRAC_PI_6: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_PI_6); -+ -+ /// π/8 -+ pub const FRAC_PI_8: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_PI_8); -+ -+ /// 1/π -+ pub const FRAC_1_PI: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_1_PI); -+ -+ /// 2/π -+ pub const FRAC_2_PI: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_2_PI); -+ -+ /// 2/sqrt(π) -+ pub const FRAC_2_SQRT_PI: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); -+ -+ /// sqrt(2) -+ pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); -+ -+ /// 1/sqrt(2) -+ pub const FRAC_1_SQRT_2: $id = -+ $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); -+ -+ /// Euler's number (e) -+ pub const E: $id = $id::splat(core::$elem_ty::consts::E); -+ -+ /// log2(e) -+ pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); -+ -+ /// log10(e) -+ pub const LOG10_E: $id = -+ $id::splat(core::$elem_ty::consts::LOG10_E); -+ -+ /// ln(2) -+ pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); -+ -+ /// ln(10) -+ pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs -new file mode 100644 -index 000000000000..e5b8f46036c7 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs -@@ -0,0 +1,44 @@ -+//! Implements vertical (lane-wise) floating-point `cos`. -+ -+macro_rules! impl_math_float_cos { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Cosine. -+ #[inline] -+ pub fn cos(self) -> Self { -+ use crate::codegen::math::float::cos::Cos; -+ Cos::cos(self) -+ } -+ -+ /// Cosine of `self * PI`. -+ #[inline] -+ pub fn cos_pi(self) -> Self { -+ use crate::codegen::math::float::cos_pi::CosPi; -+ CosPi::cos_pi(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_cos>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn cos() { -+ use crate::$elem_ty::consts::PI; -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let p = $id::splat(PI as $elem_ty); -+ let ph = $id::splat(PI as $elem_ty / 2.); -+ let z_r = $id::splat((PI as $elem_ty / 2.).cos()); -+ let o_r = $id::splat((PI as $elem_ty).cos()); -+ -+ assert_eq!(o, z.cos()); -+ assert_eq!(z_r, ph.cos()); -+ assert_eq!(o_r, p.cos()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs -new file mode 100644 -index 000000000000..e3356d853a83 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs -@@ -0,0 +1,33 @@ -+//! Implements vertical (lane-wise) floating-point `exp`. -+ -+macro_rules! impl_math_float_exp { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Returns the exponential function of `self`: `e^(self)`. -+ #[inline] -+ pub fn exp(self) -> Self { -+ use crate::codegen::math::float::exp::Exp; -+ Exp::exp(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_exp>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn exp() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ assert_eq!(o, z.exp()); -+ -+ let e = $id::splat(crate::f64::consts::E as $elem_ty); -+ let tol = $id::splat(2.4e-4 as $elem_ty); -+ assert!((e - o.exp()).abs().le(tol).all()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs -new file mode 100644 -index 000000000000..5ceb9173ae05 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs -@@ -0,0 +1,33 @@ -+//! Implements vertical (lane-wise) floating-point `ln`. -+ -+macro_rules! impl_math_float_ln { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Returns the natural logarithm of `self`. -+ #[inline] -+ pub fn ln(self) -> Self { -+ use crate::codegen::math::float::ln::Ln; -+ Ln::ln(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_ln>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ln() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ assert_eq!(z, o.ln()); -+ -+ let e = $id::splat(crate::f64::consts::E as $elem_ty); -+ let tol = $id::splat(2.4e-4 as $elem_ty); -+ assert!((o - e.ln()).abs().le(tol).all()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs -new file mode 100644 -index 000000000000..4b170ee2b755 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs -@@ -0,0 +1,44 @@ -+//! Implements vertical (lane-wise) floating-point `mul_add`. -+ -+macro_rules! impl_math_float_mul_add { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Fused multiply add: `self * y + z` -+ #[inline] -+ pub fn mul_add(self, y: Self, z: Self) -> Self { -+ use crate::codegen::math::float::mul_add::MulAdd; -+ MulAdd::mul_add(self, y, z) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_mul_add>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn mul_add() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let t3 = $id::splat(3 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ assert_eq!(z, z.mul_add(z, z)); -+ assert_eq!(o, o.mul_add(o, z)); -+ assert_eq!(o, o.mul_add(z, o)); -+ assert_eq!(o, z.mul_add(o, o)); -+ -+ assert_eq!(t, o.mul_add(o, o)); -+ assert_eq!(t, o.mul_add(t, z)); -+ assert_eq!(t, t.mul_add(o, z)); -+ -+ assert_eq!(f, t.mul_add(t, z)); -+ assert_eq!(f, t.mul_add(o, t)); -+ assert_eq!(t3, t.mul_add(o, o)); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs -new file mode 100644 -index 000000000000..c5b27110f2d7 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs -@@ -0,0 +1,48 @@ -+//! Implements vertical (lane-wise) floating-point `mul_adde`. -+ -+macro_rules! impl_math_float_mul_adde { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Fused multiply add estimate: ~= `self * y + z` -+ /// -+ /// While fused multiply-add (`fma`) has infinite precision, -+ /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. -+ /// This might be more efficient on architectures that do not have an `fma` instruction. -+ #[inline] -+ pub fn mul_adde(self, y: Self, z: Self) -> Self { -+ use crate::codegen::math::float::mul_adde::MulAddE; -+ MulAddE::mul_adde(self, y, z) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_mul_adde>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn mul_adde() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let t3 = $id::splat(3 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ assert_eq!(z, z.mul_adde(z, z)); -+ assert_eq!(o, o.mul_adde(o, z)); -+ assert_eq!(o, o.mul_adde(z, o)); -+ assert_eq!(o, z.mul_adde(o, o)); -+ -+ assert_eq!(t, o.mul_adde(o, o)); -+ assert_eq!(t, o.mul_adde(t, z)); -+ assert_eq!(t, t.mul_adde(o, z)); -+ -+ assert_eq!(f, t.mul_adde(t, z)); -+ assert_eq!(f, t.mul_adde(o, t)); -+ assert_eq!(t3, t.mul_adde(o, o)); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs -new file mode 100644 -index 000000000000..83dc9ff9c05e ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs -@@ -0,0 +1,36 @@ -+//! Implements vertical (lane-wise) floating-point `powf`. -+ -+macro_rules! impl_math_float_powf { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Raises `self` number to the floating point power of `x`. -+ #[inline] -+ pub fn powf(self, x: Self) -> Self { -+ use crate::codegen::math::float::powf::Powf; -+ Powf::powf(self, x) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_powf>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn powf() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ assert_eq!(o, o.powf(z)); -+ assert_eq!(o, t.powf(z)); -+ assert_eq!(o, o.powf(o)); -+ assert_eq!(t, t.powf(o)); -+ -+ let f = $id::splat(4 as $elem_ty); -+ assert_eq!(f, t.powf(t)); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs -new file mode 100644 -index 000000000000..127f0b2ff674 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs -@@ -0,0 +1,36 @@ -+//! Implements vertical (lane-wise) floating-point `recpre`. -+ -+macro_rules! impl_math_float_recpre { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Reciprocal estimate: `~= 1. / self`. -+ /// -+ /// FIXME: The precision of the estimate is currently unspecified. -+ #[inline] -+ pub fn recpre(self) -> Self { -+ $id::splat(1.) / self -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_recpre>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn recpre() { -+ let tol = $id::splat(2.4e-4 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let error = (o - o.recpre()).abs(); -+ assert!(error.le(tol).all()); -+ -+ let t = $id::splat(2 as $elem_ty); -+ let e = 0.5; -+ let error = (e - t.recpre()).abs(); -+ assert!(error.le(tol).all()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs -new file mode 100644 -index 000000000000..c77977f7b1cd ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs -@@ -0,0 +1,40 @@ -+//! Implements vertical (lane-wise) floating-point `rsqrte`. -+ -+macro_rules! impl_math_float_rsqrte { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. -+ /// -+ /// FIXME: The precision of the estimate is currently unspecified. -+ #[inline] -+ pub fn rsqrte(self) -> Self { -+ unsafe { -+ use crate::llvm::simd_fsqrt; -+ $id::splat(1.) / Simd(simd_fsqrt(self.0)) -+ } -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_rsqrte>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn rsqrte() { -+ use crate::$elem_ty::consts::SQRT_2; -+ let tol = $id::splat(2.4e-4 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let error = (o - o.rsqrte()).abs(); -+ assert!(error.le(tol).all()); -+ -+ let t = $id::splat(2 as $elem_ty); -+ let e = 1. / SQRT_2; -+ let error = (e - t.rsqrte()).abs(); -+ assert!(error.le(tol).all()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs -new file mode 100644 -index 000000000000..49908319b126 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs -@@ -0,0 +1,50 @@ -+//! Implements vertical (lane-wise) floating-point `sin`. -+ -+macro_rules! impl_math_float_sin { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Sine. -+ #[inline] -+ pub fn sin(self) -> Self { -+ use crate::codegen::math::float::sin::Sin; -+ Sin::sin(self) -+ } -+ -+ /// Sine of `self * PI`. -+ #[inline] -+ pub fn sin_pi(self) -> Self { -+ use crate::codegen::math::float::sin_pi::SinPi; -+ SinPi::sin_pi(self) -+ } -+ -+ /// Sine and cosine of `self * PI`. -+ #[inline] -+ pub fn sin_cos_pi(self) -> (Self, Self) { -+ use crate::codegen::math::float::sin_cos_pi::SinCosPi; -+ SinCosPi::sin_cos_pi(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_sin>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn sin() { -+ use crate::$elem_ty::consts::PI; -+ let z = $id::splat(0 as $elem_ty); -+ let p = $id::splat(PI as $elem_ty); -+ let ph = $id::splat(PI as $elem_ty / 2.); -+ let o_r = $id::splat((PI as $elem_ty / 2.).sin()); -+ let z_r = $id::splat((PI as $elem_ty).sin()); -+ -+ assert_eq!(z, z.sin()); -+ assert_eq!(o_r, ph.sin()); -+ assert_eq!(z_r, p.sin()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs -new file mode 100644 -index 000000000000..ae624122d0e2 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs -@@ -0,0 +1,35 @@ -+//! Implements vertical (lane-wise) floating-point `sqrt`. -+ -+macro_rules! impl_math_float_sqrt { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ #[inline] -+ pub fn sqrt(self) -> Self { -+ use crate::codegen::math::float::sqrt::Sqrt; -+ Sqrt::sqrt(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_sqrt>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn sqrt() { -+ use crate::$elem_ty::consts::SQRT_2; -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ assert_eq!(z, z.sqrt()); -+ assert_eq!(o, o.sqrt()); -+ -+ let t = $id::splat(2 as $elem_ty); -+ let e = $id::splat(SQRT_2); -+ assert_eq!(e, t.sqrt()); -+ -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs -new file mode 100644 -index 000000000000..f7ffad748d9c ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs -@@ -0,0 +1,44 @@ -+//! Implements vertical (lane-wise) floating-point `sqrte`. -+ -+macro_rules! impl_math_float_sqrte { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Square-root estimate. -+ /// -+ /// FIXME: The precision of the estimate is currently unspecified. -+ #[inline] -+ pub fn sqrte(self) -> Self { -+ use crate::codegen::math::float::sqrte::Sqrte; -+ Sqrte::sqrte(self) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _math_sqrte>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn sqrte() { -+ use crate::$elem_ty::consts::SQRT_2; -+ let tol = $id::splat(2.4e-4 as $elem_ty); -+ -+ let z = $id::splat(0 as $elem_ty); -+ let error = (z - z.sqrte()).abs(); -+ assert!(error.le(tol).all()); -+ -+ let o = $id::splat(1 as $elem_ty); -+ let error = (o - o.sqrte()).abs(); -+ assert!(error.le(tol).all()); -+ -+ let t = $id::splat(2 as $elem_ty); -+ let e = $id::splat(SQRT_2 as $elem_ty); -+ let error = (e - t.sqrte()).abs(); -+ -+ assert!(error.le(tol).all()); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs -new file mode 100644 -index 000000000000..840d9e32585d ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/minimal.rs -@@ -0,0 +1,6 @@ -+#[macro_use] -+mod iuf; -+#[macro_use] -+mod mask; -+#[macro_use] -+mod ptr; -diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs -new file mode 100644 -index 000000000000..58ffabab994f ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs -@@ -0,0 +1,167 @@ -+//! Minimal API of signed integer, unsigned integer, and floating-point -+//! vectors. -+ -+macro_rules! impl_minimal_iuf { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | -+ $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { -+ -+ $(#[$doc])* -+ pub type $id = Simd<[$elem_ty; $elem_count]>; -+ -+ impl sealed::Simd for $id { -+ type Element = $elem_ty; -+ const LANES: usize = $elem_count; -+ type LanesType = [u32; $elem_count]; -+ } -+ -+ impl $id { -+ /// Creates a new instance with each vector elements initialized -+ /// with the provided values. -+ #[inline] -+ #[allow(clippy::too_many_arguments)] -+ pub const fn new($($elem_name: $elem_ty),*) -> Self { -+ Simd(codegen::$id($($elem_name as $ielem_ty),*)) -+ } -+ -+ /// Returns the number of vector lanes. -+ #[inline] -+ pub const fn lanes() -> usize { -+ $elem_count -+ } -+ -+ /// Constructs a new instance with each element initialized to -+ /// `value`. -+ #[inline] -+ pub const fn splat(value: $elem_ty) -> Self { -+ Simd(codegen::$id($({ -+ #[allow(non_camel_case_types, dead_code)] -+ struct $elem_name; -+ value as $ielem_ty -+ }),*)) -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ pub fn extract(self, index: usize) -> $elem_ty { -+ assert!(index < $elem_count); -+ unsafe { self.extract_unchecked(index) } -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// # Precondition -+ /// -+ /// If `index >= Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { -+ use crate::llvm::simd_extract; -+ let e: $ielem_ty = simd_extract(self.0, index as u32); -+ e as $elem_ty -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by `new_value`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ #[must_use = "replace does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { -+ assert!(index < $elem_count); -+ unsafe { self.replace_unchecked(index, new_value) } -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by `new_value`. -+ /// -+ /// # Precondition -+ /// -+ /// If `index >= Self::lanes()` the behavior is undefined. -+ #[inline] -+ #[must_use = "replace_unchecked does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ pub unsafe fn replace_unchecked( -+ self, -+ index: usize, -+ new_value: $elem_ty, -+ ) -> Self { -+ use crate::llvm::simd_insert; -+ Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _minimal>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn minimal() { -+ // lanes: -+ assert_eq!($elem_count, $id::lanes()); -+ -+ // splat and extract / extract_unchecked: -+ const VAL: $elem_ty = 7 as $elem_ty; -+ const VEC: $id = $id::splat(VAL); -+ for i in 0..$id::lanes() { -+ assert_eq!(VAL, VEC.extract(i)); -+ assert_eq!( -+ VAL, unsafe { VEC.extract_unchecked(i) } -+ ); -+ } -+ -+ // replace / replace_unchecked -+ let new_vec = VEC.replace(0, 42 as $elem_ty); -+ for i in 0..$id::lanes() { -+ if i == 0 { -+ assert_eq!(42 as $elem_ty, new_vec.extract(i)); -+ } else { -+ assert_eq!(VAL, new_vec.extract(i)); -+ } -+ } -+ let new_vec = unsafe { -+ VEC.replace_unchecked(0, 42 as $elem_ty) -+ }; -+ for i in 0..$id::lanes() { -+ if i == 0 { -+ assert_eq!(42 as $elem_ty, new_vec.extract(i)); -+ } else { -+ assert_eq!(VAL, new_vec.extract(i)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn extract_panic_oob() { -+ const VAL: $elem_ty = 7 as $elem_ty; -+ const VEC: $id = $id::splat(VAL); -+ let _ = VEC.extract($id::lanes()); -+ } -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn replace_panic_oob() { -+ const VAL: $elem_ty = 7 as $elem_ty; -+ const VEC: $id = $id::splat(VAL); -+ let _ = VEC.replace($id::lanes(), 42 as $elem_ty); -+ } -+ } -+ } -+ } -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs -new file mode 100644 -index 000000000000..e65be95db12c ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs -@@ -0,0 +1,174 @@ -+//! Minimal API of mask vectors. -+ -+macro_rules! impl_minimal_mask { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident -+ | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { -+ $(#[$doc])* -+ pub type $id = Simd<[$elem_ty; $elem_count]>; -+ -+ impl sealed::Simd for $id { -+ type Element = $elem_ty; -+ const LANES: usize = $elem_count; -+ type LanesType = [u32; $elem_count]; -+ } -+ -+ impl $id { -+ /// Creates a new instance with each vector elements initialized -+ /// with the provided values. -+ #[inline] -+ #[allow(clippy::too_many_arguments)] -+ pub const fn new($($elem_name: bool),*) -> Self { -+ Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) -+ } -+ -+ /// Converts a boolean type into the type of the vector lanes. -+ #[inline] -+ #[allow(clippy::indexing_slicing)] -+ const fn bool_to_internal(x: bool) -> $ielem_ty { -+ [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] -+ } -+ -+ /// Returns the number of vector lanes. -+ #[inline] -+ pub const fn lanes() -> usize { -+ $elem_count -+ } -+ -+ /// Constructs a new instance with each element initialized to -+ /// `value`. -+ #[inline] -+ pub const fn splat(value: bool) -> Self { -+ Simd(codegen::$id($({ -+ #[allow(non_camel_case_types, dead_code)] -+ struct $elem_name; -+ Self::bool_to_internal(value) -+ }),*)) -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ pub fn extract(self, index: usize) -> bool { -+ assert!(index < $elem_count); -+ unsafe { self.extract_unchecked(index) } -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// If `index >= Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn extract_unchecked(self, index: usize) -> bool { -+ use crate::llvm::simd_extract; -+ let x: $ielem_ty = simd_extract(self.0, index as u32); -+ x != 0 -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by -+ /// `new_value`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ #[must_use = "replace does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ pub fn replace(self, index: usize, new_value: bool) -> Self { -+ assert!(index < $elem_count); -+ unsafe { self.replace_unchecked(index, new_value) } -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by -+ /// `new_value`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ #[must_use = "replace_unchecked does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ pub unsafe fn replace_unchecked( -+ self, -+ index: usize, -+ new_value: bool, -+ ) -> Self { -+ use crate::llvm::simd_insert; -+ Simd(simd_insert(self.0, index as u32, -+ Self::bool_to_internal(new_value))) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _minimal>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn minimal() { -+ // TODO: test new -+ -+ // lanes: -+ assert_eq!($elem_count, $id::lanes()); -+ -+ // splat and extract / extract_unchecked: -+ let vec = $id::splat(true); -+ for i in 0..$id::lanes() { -+ assert_eq!(true, vec.extract(i)); -+ assert_eq!(true, -+ unsafe { vec.extract_unchecked(i) } -+ ); -+ } -+ -+ // replace / replace_unchecked -+ let new_vec = vec.replace(0, false); -+ for i in 0..$id::lanes() { -+ if i == 0 { -+ assert_eq!(false, new_vec.extract(i)); -+ } else { -+ assert_eq!(true, new_vec.extract(i)); -+ } -+ } -+ let new_vec = unsafe { -+ vec.replace_unchecked(0, false) -+ }; -+ for i in 0..$id::lanes() { -+ if i == 0 { -+ assert_eq!(false, new_vec.extract(i)); -+ } else { -+ assert_eq!(true, new_vec.extract(i)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn extract_panic_oob() { -+ let vec = $id::splat(false); -+ let _ = vec.extract($id::lanes()); -+ } -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn replace_panic_oob() { -+ let vec = $id::splat(false); -+ let _ = vec.replace($id::lanes(), true); -+ } -+ } -+ } -+ } -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs -new file mode 100644 -index 000000000000..75e5aad5c065 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs -@@ -0,0 +1,1385 @@ -+//! Minimal API of pointer vectors. -+ -+macro_rules! impl_minimal_p { -+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, -+ $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt -+ | $($elem_name:ident),+ | ($true:expr, $false:expr) | -+ $(#[$doc:meta])*) => { -+ -+ $(#[$doc])* -+ pub type $id = Simd<[$elem_ty; $elem_count]>; -+ -+ impl sealed::Simd for $id { -+ type Element = $elem_ty; -+ const LANES: usize = $elem_count; -+ type LanesType = [u32; $elem_count]; -+ } -+ -+ impl $id { -+ /// Creates a new instance with each vector elements initialized -+ /// with the provided values. -+ #[inline] -+ #[allow(clippy::too_many_arguments)] -+ pub const fn new($($elem_name: $elem_ty),*) -> Self { -+ Simd(codegen::$id($($elem_name),*)) -+ } -+ -+ /// Returns the number of vector lanes. -+ #[inline] -+ pub const fn lanes() -> usize { -+ $elem_count -+ } -+ -+ /// Constructs a new instance with each element initialized to -+ /// `value`. -+ #[inline] -+ pub const fn splat(value: $elem_ty) -> Self { -+ Simd(codegen::$id($({ -+ #[allow(non_camel_case_types, dead_code)] -+ struct $elem_name; -+ value -+ }),*)) -+ } -+ -+ /// Constructs a new instance with each element initialized to -+ /// `null`. -+ #[inline] -+ pub const fn null() -> Self { -+ Self::splat(crate::ptr::null_mut() as $elem_ty) -+ } -+ -+ /// Returns a mask that selects those lanes that contain `null` -+ /// pointers. -+ #[inline] -+ pub fn is_null(self) -> $mask_ty { -+ self.eq(Self::null()) -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ pub fn extract(self, index: usize) -> $elem_ty { -+ assert!(index < $elem_count); -+ unsafe { self.extract_unchecked(index) } -+ } -+ -+ /// Extracts the value at `index`. -+ /// -+ /// # Precondition -+ /// -+ /// If `index >= Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { -+ use crate::llvm::simd_extract; -+ simd_extract(self.0, index as u32) -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by -+ /// `new_value`. -+ /// -+ /// # Panics -+ /// -+ /// If `index >= Self::lanes()`. -+ #[inline] -+ #[must_use = "replace does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ #[allow(clippy::not_unsafe_ptr_arg_deref)] -+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { -+ assert!(index < $elem_count); -+ unsafe { self.replace_unchecked(index, new_value) } -+ } -+ -+ /// Returns a new vector where the value at `index` is replaced by `new_value`. -+ /// -+ /// # Precondition -+ /// -+ /// If `index >= Self::lanes()` the behavior is undefined. -+ #[inline] -+ #[must_use = "replace_unchecked does not modify the original value - \ -+ it returns a new vector with the value at `index` \ -+ replaced by `new_value`d" -+ ] -+ pub unsafe fn replace_unchecked( -+ self, -+ index: usize, -+ new_value: $elem_ty, -+ ) -> Self { -+ use crate::llvm::simd_insert; -+ Simd(simd_insert(self.0, index as u32, new_value)) -+ } -+ } -+ -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _minimal>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn minimal() { -+ // lanes: -+ assert_eq!($elem_count, $id::::lanes()); -+ -+ // splat and extract / extract_unchecked: -+ let VAL7: <$id as sealed::Simd>::Element -+ = $ref!(7); -+ let VAL42: <$id as sealed::Simd>::Element -+ = $ref!(42); -+ let VEC: $id = $id::splat(VAL7); -+ for i in 0..$id::::lanes() { -+ assert_eq!(VAL7, VEC.extract(i)); -+ assert_eq!( -+ VAL7, unsafe { VEC.extract_unchecked(i) } -+ ); -+ } -+ -+ // replace / replace_unchecked -+ let new_vec = VEC.replace(0, VAL42); -+ for i in 0..$id::::lanes() { -+ if i == 0 { -+ assert_eq!(VAL42, new_vec.extract(i)); -+ } else { -+ assert_eq!(VAL7, new_vec.extract(i)); -+ } -+ } -+ let new_vec = unsafe { -+ VEC.replace_unchecked(0, VAL42) -+ }; -+ for i in 0..$id::::lanes() { -+ if i == 0 { -+ assert_eq!(VAL42, new_vec.extract(i)); -+ } else { -+ assert_eq!(VAL7, new_vec.extract(i)); -+ } -+ } -+ -+ let mut n = $id::::null(); -+ assert_eq!( -+ n, -+ $id::::splat(unsafe { crate::mem::zeroed() }) -+ ); -+ assert!(n.is_null().all()); -+ n = n.replace( -+ 0, unsafe { crate::mem::transmute(1_isize) } -+ ); -+ assert!(!n.is_null().all()); -+ if $id::::lanes() > 1 { -+ assert!(n.is_null().any()); -+ } else { -+ assert!(!n.is_null().any()); -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn extract_panic_oob() { -+ let VAL: <$id as sealed::Simd>::Element -+ = $ref!(7); -+ let VEC: $id = $id::splat(VAL); -+ let _ = VEC.extract($id::::lanes()); -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn replace_panic_oob() { -+ let VAL: <$id as sealed::Simd>::Element -+ = $ref!(7); -+ let VAL42: <$id as sealed::Simd>::Element -+ = $ref!(42); -+ let VEC: $id = $id::splat(VAL); -+ let _ = VEC.replace($id::::lanes(), VAL42); -+ } -+ } -+ } -+ } -+ -+ impl crate::fmt::Debug for $id { -+ #[allow(clippy::missing_inline_in_public_items)] -+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -+ -> crate::fmt::Result { -+ write!( -+ f, -+ "{}<{}>(", -+ stringify!($id), -+ unsafe { crate::intrinsics::type_name::() } -+ )?; -+ for i in 0..$elem_count { -+ if i > 0 { -+ write!(f, ", ")?; -+ } -+ self.extract(i).fmt(f)?; -+ } -+ write!(f, ")") -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _fmt_debug>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn debug() { -+ use arrayvec::{ArrayString,ArrayVec}; -+ type TinyString = ArrayString<[u8; 512]>; -+ -+ use crate::fmt::Write; -+ let v = $id::::default(); -+ let mut s = TinyString::new(); -+ write!(&mut s, "{:?}", v).unwrap(); -+ -+ let mut beg = TinyString::new(); -+ write!(&mut beg, "{}(", stringify!($id)).unwrap(); -+ assert!( -+ s.starts_with(beg.as_str()), -+ "s = {} (should start with = {})", s, beg -+ ); -+ assert!(s.ends_with(")")); -+ let s: ArrayVec<[TinyString; 64]> -+ = s.replace(beg.as_str(), "") -+ .replace(")", "").split(",") -+ .map(|v| TinyString::from(v.trim()).unwrap()) -+ .collect(); -+ assert_eq!(s.len(), $id::::lanes()); -+ for (index, ss) in s.into_iter().enumerate() { -+ let mut e = TinyString::new(); -+ write!(&mut e, "{:?}", v.extract(index)).unwrap(); -+ assert_eq!(ss, e); -+ } -+ } -+ } -+ } -+ } -+ -+ impl Default for $id { -+ #[inline] -+ fn default() -> Self { -+ // FIXME: ptrs do not implement default -+ Self::null() -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _default>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn default() { -+ let a = $id::::default(); -+ for i in 0..$id::::lanes() { -+ assert_eq!( -+ a.extract(i), unsafe { crate::mem::zeroed() } -+ ); -+ } -+ } -+ } -+ } -+ } -+ -+ impl $id { -+ /// Lane-wise equality comparison. -+ #[inline] -+ pub fn eq(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_eq; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_eq(a.0, b.0)) -+ } -+ } -+ -+ /// Lane-wise inequality comparison. -+ #[inline] -+ pub fn ne(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_ne; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_ne(a.0, b.0)) -+ } -+ } -+ -+ /// Lane-wise less-than comparison. -+ #[inline] -+ pub fn lt(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_lt; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_lt(a.0, b.0)) -+ } -+ } -+ -+ /// Lane-wise less-than-or-equals comparison. -+ #[inline] -+ pub fn le(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_le; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_le(a.0, b.0)) -+ } -+ } -+ -+ /// Lane-wise greater-than comparison. -+ #[inline] -+ pub fn gt(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_gt; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_gt(a.0, b.0)) -+ } -+ } -+ -+ /// Lane-wise greater-than-or-equals comparison. -+ #[inline] -+ pub fn ge(self, other: Self) -> $mask_ty { -+ unsafe { -+ use crate::llvm::simd_ge; -+ let a: $usize_ty = crate::mem::transmute(self); -+ let b: $usize_ty = crate::mem::transmute(other); -+ Simd(simd_ge(a.0, b.0)) -+ } -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_vertical>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn cmp() { -+ let a = $id::::null(); -+ let b = $id::::splat(unsafe { -+ crate::mem::transmute(1_isize) -+ }); -+ -+ let r = a.lt(b); -+ let e = $mask_ty::splat(true); -+ assert!(r == e); -+ let r = a.le(b); -+ assert!(r == e); -+ -+ let e = $mask_ty::splat(false); -+ let r = a.gt(b); -+ assert!(r == e); -+ let r = a.ge(b); -+ assert!(r == e); -+ let r = a.eq(b); -+ assert!(r == e); -+ -+ let mut a = a; -+ let mut b = b; -+ let mut e = e; -+ for i in 0..$id::::lanes() { -+ if i % 2 == 0 { -+ a = a.replace( -+ i, -+ unsafe { crate::mem::transmute(0_isize) } -+ ); -+ b = b.replace( -+ i, -+ unsafe { crate::mem::transmute(1_isize) } -+ ); -+ e = e.replace(i, true); -+ } else { -+ a = a.replace( -+ i, -+ unsafe { crate::mem::transmute(1_isize) } -+ ); -+ b = b.replace( -+ i, -+ unsafe { crate::mem::transmute(0_isize) } -+ ); -+ e = e.replace(i, false); -+ } -+ } -+ let r = a.lt(b); -+ assert!(r == e); -+ } -+ } -+ } -+ } -+ -+ #[allow(clippy::partialeq_ne_impl)] -+ impl crate::cmp::PartialEq<$id> for $id { -+ #[inline] -+ fn eq(&self, other: &Self) -> bool { -+ $id::::eq(*self, *other).all() -+ } -+ #[inline] -+ fn ne(&self, other: &Self) -> bool { -+ $id::::ne(*self, *other).any() -+ } -+ } -+ -+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 -+ #[allow(clippy::partialeq_ne_impl)] -+ impl crate::cmp::PartialEq>> -+ for LexicographicallyOrdered<$id> -+ { -+ #[inline] -+ fn eq(&self, other: &Self) -> bool { -+ self.0 == other.0 -+ } -+ #[inline] -+ fn ne(&self, other: &Self) -> bool { -+ self.0 != other.0 -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_PartialEq>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn partial_eq() { -+ let a = $id::::null(); -+ let b = $id::::splat(unsafe { -+ crate::mem::transmute(1_isize) -+ }); -+ -+ assert!(a != b); -+ assert!(!(a == b)); -+ assert!(a == a); -+ assert!(!(a != a)); -+ -+ if $id::::lanes() > 1 { -+ let a = $id::::null().replace(0, unsafe { -+ crate::mem::transmute(1_isize) -+ }); -+ let b = $id::::splat(unsafe { -+ crate::mem::transmute(1_isize) -+ }); -+ -+ assert!(a != b); -+ assert!(!(a == b)); -+ assert!(a == a); -+ assert!(!(a != a)); -+ } -+ } -+ } -+ } -+ } -+ -+ impl crate::cmp::Eq for $id {} -+ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _cmp_eq>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn eq() { -+ fn foo(_: E) {} -+ let a = $id::::null(); -+ foo(a); -+ } -+ } -+ } -+ } -+ -+ impl From<[$elem_ty; $elem_count]> for $id { -+ #[inline] -+ fn from(array: [$elem_ty; $elem_count]) -> Self { -+ unsafe { -+ // FIXME: unnecessary zeroing; better than UB. -+ let mut u: Self = crate::mem::zeroed(); -+ crate::ptr::copy_nonoverlapping( -+ &array as *const [$elem_ty; $elem_count] as *const u8, -+ &mut u as *mut Self as *mut u8, -+ crate::mem::size_of::() -+ ); -+ u -+ } -+ } -+ } -+ impl Into<[$elem_ty; $elem_count]> for $id { -+ #[inline] -+ fn into(self) -> [$elem_ty; $elem_count] { -+ unsafe { -+ // FIXME: unnecessary zeroing; better than UB. -+ let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); -+ crate::ptr::copy_nonoverlapping( -+ &self as *const $id as *const u8, -+ &mut u as *mut [$elem_ty; $elem_count] as *mut u8, -+ crate::mem::size_of::() -+ ); -+ u -+ } -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _from>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn array() { -+ let values = [1_i32; $elem_count]; -+ -+ let mut vec: $id = Default::default(); -+ let mut array = [ -+ $id::::null().extract(0); $elem_count -+ ]; -+ -+ for i in 0..$elem_count { -+ let ptr = unsafe { -+ crate::mem::transmute( -+ &values[i] as *const i32 -+ ) -+ }; -+ vec = vec.replace(i, ptr); -+ array[i] = ptr; -+ } -+ -+ // FIXME: there is no impl of From<$id> for [$elem_ty; N] -+ // let a0 = From::from(vec); -+ // assert_eq!(a0, array); -+ #[allow(unused_assignments)] -+ let mut a1 = array; -+ a1 = vec.into(); -+ assert_eq!(a1, array); -+ -+ let v0: $id = From::from(array); -+ assert_eq!(v0, vec); -+ let v1: $id = array.into(); -+ assert_eq!(v1, vec); -+ } -+ } -+ } -+ } -+ -+ impl $id { -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned -+ /// to an `align_of::()` boundary. -+ #[inline] -+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; -+ assert!( -+ target_ptr.align_offset(crate::mem::align_of::()) -+ == 0 -+ ); -+ Self::from_slice_aligned_unchecked(slice) -+ } -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()`. -+ #[inline] -+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ Self::from_slice_unaligned_unchecked(slice) -+ } -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned -+ /// to an `align_of::()` boundary, the behavior is undefined. -+ #[inline] -+ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) -+ -> Self { -+ #[allow(clippy::cast_ptr_alignment)] -+ *(slice.get_unchecked(0) as *const $elem_ty as *const Self) -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn from_slice_unaligned_unchecked( -+ slice: &[$elem_ty], -+ ) -> Self { -+ use crate::mem::size_of; -+ let target_ptr = -+ slice.get_unchecked(0) as *const $elem_ty as *const u8; -+ let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); -+ let self_ptr = &mut x as *mut Self as *mut u8; -+ crate::ptr::copy_nonoverlapping( -+ target_ptr, -+ self_ptr, -+ size_of::(), -+ ); -+ x -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _slice_from_slice>] { -+ use super::*; -+ use crate::iter::Iterator; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_slice_unaligned() { -+ let (null, non_null) = ptr_vals!($id); -+ -+ let mut unaligned = [ -+ non_null; $id::::lanes() + 1 -+ ]; -+ unaligned[0] = null; -+ let vec = $id::::from_slice_unaligned( -+ &unaligned[1..] -+ ); -+ for (index, &b) in unaligned.iter().enumerate() { -+ if index == 0 { -+ assert_eq!(b, null); -+ } else { -+ assert_eq!(b, non_null); -+ assert_eq!(b, vec.extract(index - 1)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_unaligned_fail() { -+ let (_null, non_null) = ptr_vals!($id); -+ let unaligned = [non_null; $id::::lanes() + 1]; -+ // the slice is not large enough => panic -+ let _vec = $id::::from_slice_unaligned( -+ &unaligned[2..] -+ ); -+ } -+ -+ union A { -+ data: [<$id as sealed::Simd>::Element; -+ 2 * $id::::lanes()], -+ _vec: $id, -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_slice_aligned() { -+ let (null, non_null) = ptr_vals!($id); -+ let mut aligned = A { -+ data: [null; 2 * $id::::lanes()], -+ }; -+ for i in -+ $id::::lanes()..(2 * $id::::lanes()) { -+ unsafe { -+ aligned.data[i] = non_null; -+ } -+ } -+ -+ let vec = unsafe { -+ $id::::from_slice_aligned( -+ &aligned.data[$id::::lanes()..] -+ ) -+ }; -+ for (index, &b) in unsafe { -+ aligned.data.iter().enumerate() -+ } { -+ if index < $id::::lanes() { -+ assert_eq!(b, null); -+ } else { -+ assert_eq!(b, non_null); -+ assert_eq!( -+ b, vec.extract(index - $id::::lanes()) -+ ); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_aligned_fail_lanes() { -+ let (_null, non_null) = ptr_vals!($id); -+ let aligned = A { -+ data: [non_null; 2 * $id::::lanes()], -+ }; -+ // the slice is not large enough => panic -+ let _vec = unsafe { -+ $id::::from_slice_aligned( -+ &aligned.data[2 * $id::::lanes()..] -+ ) -+ }; -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_aligned_fail_align() { -+ unsafe { -+ let (null, _non_null) = ptr_vals!($id); -+ let aligned = A { -+ data: [null; 2 * $id::::lanes()], -+ }; -+ -+ // get a pointer to the front of data -+ let ptr = aligned.data.as_ptr(); -+ // offset pointer by one element -+ let ptr = ptr.wrapping_add(1); -+ -+ if ptr.align_offset( -+ crate::mem::align_of::<$id>() -+ ) == 0 { -+ // the pointer is properly aligned, so -+ // from_slice_aligned won't fail here (e.g. this -+ // can happen for i128x1). So we panic to make -+ // the "should_fail" test pass: -+ panic!("ok"); -+ } -+ -+ // create a slice - this is safe, because the -+ // elements of the slice exist, are properly -+ // initialized, and properly aligned: -+ let s = slice::from_raw_parts( -+ ptr, $id::::lanes() -+ ); -+ // this should always panic because the slice -+ // alignment does not match the alignment -+ // requirements for the vector type: -+ let _vec = $id::::from_slice_aligned(s); -+ } -+ } -+ } -+ } -+ } -+ -+ impl $id { -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not -+ /// aligned to an `align_of::()` boundary. -+ #[inline] -+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ let target_ptr = -+ slice.get_unchecked_mut(0) as *mut $elem_ty; -+ assert!( -+ target_ptr.align_offset(crate::mem::align_of::()) -+ == 0 -+ ); -+ self.write_to_slice_aligned_unchecked(slice); -+ } -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()`. -+ #[inline] -+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ self.write_to_slice_unaligned_unchecked(slice); -+ } -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not -+ /// aligned to an `align_of::()` boundary, the behavior is -+ /// undefined. -+ #[inline] -+ pub unsafe fn write_to_slice_aligned_unchecked( -+ self, slice: &mut [$elem_ty], -+ ) { -+ #[allow(clippy::cast_ptr_alignment)] -+ *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = -+ self; -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn write_to_slice_unaligned_unchecked( -+ self, slice: &mut [$elem_ty], -+ ) { -+ let target_ptr = -+ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; -+ let self_ptr = &self as *const Self as *const u8; -+ crate::ptr::copy_nonoverlapping( -+ self_ptr, -+ target_ptr, -+ crate::mem::size_of::(), -+ ); -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _slice_write_to_slice>] { -+ use super::*; -+ use crate::iter::Iterator; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn write_to_slice_unaligned() { -+ let (null, non_null) = ptr_vals!($id); -+ let mut unaligned = [null; $id::::lanes() + 1]; -+ let vec = $id::::splat(non_null); -+ vec.write_to_slice_unaligned(&mut unaligned[1..]); -+ for (index, &b) in unaligned.iter().enumerate() { -+ if index == 0 { -+ assert_eq!(b, null); -+ } else { -+ assert_eq!(b, non_null); -+ assert_eq!(b, vec.extract(index - 1)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_unaligned_fail() { -+ let (null, non_null) = ptr_vals!($id); -+ let mut unaligned = [null; $id::::lanes() + 1]; -+ let vec = $id::::splat(non_null); -+ // the slice is not large enough => panic -+ vec.write_to_slice_unaligned(&mut unaligned[2..]); -+ } -+ -+ union A { -+ data: [<$id as sealed::Simd>::Element; -+ 2 * $id::::lanes()], -+ _vec: $id, -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn write_to_slice_aligned() { -+ let (null, non_null) = ptr_vals!($id); -+ let mut aligned = A { -+ data: [null; 2 * $id::::lanes()], -+ }; -+ let vec = $id::::splat(non_null); -+ unsafe { -+ vec.write_to_slice_aligned( -+ &mut aligned.data[$id::::lanes()..] -+ ) -+ }; -+ for (index, &b) in -+ unsafe { aligned.data.iter().enumerate() } { -+ if index < $id::::lanes() { -+ assert_eq!(b, null); -+ } else { -+ assert_eq!(b, non_null); -+ assert_eq!( -+ b, vec.extract(index - $id::::lanes()) -+ ); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_aligned_fail_lanes() { -+ let (null, non_null) = ptr_vals!($id); -+ let mut aligned = A { -+ data: [null; 2 * $id::::lanes()], -+ }; -+ let vec = $id::::splat(non_null); -+ // the slice is not large enough => panic -+ unsafe { -+ vec.write_to_slice_aligned( -+ &mut aligned.data[2 * $id::::lanes()..] -+ ) -+ }; -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_aligned_fail_align() { -+ let (null, non_null) = ptr_vals!($id); -+ unsafe { -+ let mut aligned = A { -+ data: [null; 2 * $id::::lanes()], -+ }; -+ -+ // get a pointer to the front of data -+ let ptr = aligned.data.as_mut_ptr(); -+ // offset pointer by one element -+ let ptr = ptr.wrapping_add(1); -+ -+ if ptr.align_offset( -+ crate::mem::align_of::<$id>() -+ ) == 0 { -+ // the pointer is properly aligned, so -+ // write_to_slice_aligned won't fail here (e.g. -+ // this can happen for i128x1). So we panic to -+ // make the "should_fail" test pass: -+ panic!("ok"); -+ } -+ -+ // create a slice - this is safe, because the -+ // elements of the slice exist, are properly -+ // initialized, and properly aligned: -+ let s = slice::from_raw_parts_mut( -+ ptr, $id::::lanes() -+ ); -+ // this should always panic because the slice -+ // alignment does not match the alignment -+ // requirements for the vector type: -+ let vec = $id::::splat(non_null); -+ vec.write_to_slice_aligned(s); -+ } -+ } -+ } -+ } -+ } -+ -+ impl crate::hash::Hash for $id { -+ #[inline] -+ fn hash(&self, state: &mut H) { -+ let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; -+ s.hash(state) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _hash>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn hash() { -+ use crate::hash::{Hash, Hasher}; -+ #[allow(deprecated)] -+ use crate::hash::{SipHasher13}; -+ -+ let values = [1_i32; $elem_count]; -+ -+ let mut vec: $id = Default::default(); -+ let mut array = [ -+ $id::::null().extract(0); -+ $elem_count -+ ]; -+ -+ for i in 0..$elem_count { -+ let ptr = unsafe { -+ crate::mem::transmute( -+ &values[i] as *const i32 -+ ) -+ }; -+ vec = vec.replace(i, ptr); -+ array[i] = ptr; -+ } -+ -+ #[allow(deprecated)] -+ let mut a_hash = SipHasher13::new(); -+ let mut v_hash = a_hash.clone(); -+ array.hash(&mut a_hash); -+ vec.hash(&mut v_hash); -+ assert_eq!(a_hash.finish(), v_hash.finish()); -+ } -+ } -+ } -+ } -+ -+ impl $id { -+ /// Calculates the offset from a pointer. -+ /// -+ /// `count` is in units of `T`; e.g. a count of `3` represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// If any of the following conditions are violated, the result is -+ /// Undefined Behavior: -+ /// -+ /// * Both the starting and resulting pointer must be either in -+ /// bounds or one byte past the end of an allocated object. -+ /// -+ /// * The computed offset, in bytes, cannot overflow an `isize`. -+ /// -+ /// * The offset being in bounds cannot rely on "wrapping around" -+ /// the address space. That is, the infinite-precision sum, in bytes -+ /// must fit in a `usize`. -+ /// -+ /// The compiler and standard library generally tries to ensure -+ /// allocations never reach a size where an offset is a concern. For -+ /// instance, `Vec` and `Box` ensure they never allocate more than -+ /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` -+ /// is always safe. -+ /// -+ /// Most platforms fundamentally can't even construct such an -+ /// allocation. For instance, no known 64-bit platform can ever -+ /// serve a request for 263 bytes due to page-table limitations or -+ /// splitting the address space. However, some 32-bit and 16-bit -+ /// platforms may successfully serve a request for more than -+ /// `isize::MAX` bytes with things like Physical Address Extension. -+ /// As such, memory acquired directly from allocators or memory -+ /// mapped files may be too large to handle with this function. -+ /// -+ /// Consider using `wrapping_offset` instead if these constraints -+ /// are difficult to satisfy. The only advantage of this method is -+ /// that it enables more aggressive compiler optimizations. -+ #[inline] -+ pub unsafe fn offset(self, count: $isize_ty) -> Self { -+ // FIXME: should use LLVM's `add nsw nuw` -+ self.wrapping_offset(count) -+ } -+ -+ /// Calculates the offset from a pointer using wrapping arithmetic. -+ /// -+ /// `count` is in units of `T`; e.g. a count of `3` represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// The resulting pointer does not need to be in bounds, but it is -+ /// potentially hazardous to dereference (which requires unsafe). -+ /// -+ /// Always use `.offset(count)` instead when possible, because -+ /// offset allows the compiler to optimize better. -+ #[inline] -+ pub fn wrapping_offset(self, count: $isize_ty) -> Self { -+ unsafe { -+ let x: $isize_ty = crate::mem::transmute(self); -+ // note: {+,*} currently performs a `wrapping_{add, mul}` -+ crate::mem::transmute( -+ x + (count * crate::mem::size_of::() as isize) -+ ) -+ } -+ } -+ -+ /// Calculates the distance between two pointers. -+ /// -+ /// The returned value is in units of `T`: the distance in bytes is -+ /// divided by `mem::size_of::()`. -+ /// -+ /// This function is the inverse of offset. -+ /// -+ /// # Safety -+ /// -+ /// If any of the following conditions are violated, the result is -+ /// Undefined Behavior: -+ /// -+ /// * Both the starting and other pointer must be either in bounds -+ /// or one byte past the end of the same allocated object. -+ /// -+ /// * The distance between the pointers, in bytes, cannot overflow -+ /// an `isize`. -+ /// -+ /// * The distance between the pointers, in bytes, must be an exact -+ /// multiple of the size of `T`. -+ /// -+ /// * The distance being in bounds cannot rely on "wrapping around" -+ /// the address space. -+ /// -+ /// The compiler and standard library generally try to ensure -+ /// allocations never reach a size where an offset is a concern. For -+ /// instance, `Vec` and `Box` ensure they never allocate more than -+ /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` -+ /// is always safe. -+ /// -+ /// Most platforms fundamentally can't even construct such an -+ /// allocation. For instance, no known 64-bit platform can ever -+ /// serve a request for 263 bytes due to page-table limitations or -+ /// splitting the address space. However, some 32-bit and 16-bit -+ /// platforms may successfully serve a request for more than -+ /// `isize::MAX` bytes with things like Physical Address Extension. -+ /// As such, memory acquired directly from allocators or memory -+ /// mapped files may be too large to handle with this function. -+ /// -+ /// Consider using wrapping_offset_from instead if these constraints -+ /// are difficult to satisfy. The only advantage of this method is -+ /// that it enables more aggressive compiler optimizations. -+ #[inline] -+ pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { -+ // FIXME: should use LLVM's `sub nsw nuw`. -+ self.wrapping_offset_from(origin) -+ } -+ -+ /// Calculates the distance between two pointers. -+ /// -+ /// The returned value is in units of `T`: the distance in bytes is -+ /// divided by `mem::size_of::()`. -+ /// -+ /// If the address different between the two pointers is not a -+ /// multiple of `mem::size_of::()` then the result of the -+ /// division is rounded towards zero. -+ /// -+ /// Though this method is safe for any two pointers, note that its -+ /// result will be mostly useless if the two pointers aren't into -+ /// the same allocated object, for example if they point to two -+ /// different local variables. -+ #[inline] -+ pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { -+ let x: $isize_ty = unsafe { crate::mem::transmute(self) }; -+ let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; -+ // note: {-,/} currently perform wrapping_{sub, div} -+ (y - x) / (crate::mem::size_of::() as isize) -+ } -+ -+ /// Calculates the offset from a pointer (convenience for -+ /// `.offset(count as isize)`). -+ /// -+ /// `count` is in units of `T`; e.g. a count of 3 represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// If any of the following conditions are violated, the result is -+ /// Undefined Behavior: -+ /// -+ /// * Both the starting and resulting pointer must be either in -+ /// bounds or one byte past the end of an allocated object. -+ /// -+ /// * The computed offset, in bytes, cannot overflow an `isize`. -+ /// -+ /// * The offset being in bounds cannot rely on "wrapping around" -+ /// the address space. That is, the infinite-precision sum must fit -+ /// in a `usize`. -+ /// -+ /// The compiler and standard library generally tries to ensure -+ /// allocations never reach a size where an offset is a concern. For -+ /// instance, `Vec` and `Box` ensure they never allocate more than -+ /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always -+ /// safe. -+ /// -+ /// Most platforms fundamentally can't even construct such an -+ /// allocation. For instance, no known 64-bit platform can ever -+ /// serve a request for 263 bytes due to page-table limitations or -+ /// splitting the address space. However, some 32-bit and 16-bit -+ /// platforms may successfully serve a request for more than -+ /// `isize::MAX` bytes with things like Physical Address Extension. -+ /// As such, memory acquired directly from allocators or memory -+ /// mapped files may be too large to handle with this function. -+ /// -+ /// Consider using `wrapping_offset` instead if these constraints -+ /// are difficult to satisfy. The only advantage of this method is -+ /// that it enables more aggressive compiler optimizations. -+ #[inline] -+ #[allow(clippy::should_implement_trait)] -+ pub unsafe fn add(self, count: $usize_ty) -> Self { -+ self.offset(count.cast()) -+ } -+ -+ /// Calculates the offset from a pointer (convenience for -+ /// `.offset((count as isize).wrapping_neg())`). -+ /// -+ /// `count` is in units of T; e.g. a `count` of 3 represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// If any of the following conditions are violated, the result is -+ /// Undefined Behavior: -+ /// -+ /// * Both the starting and resulting pointer must be either in -+ /// bounds or one byte past the end of an allocated object. -+ /// -+ /// * The computed offset cannot exceed `isize::MAX` **bytes**. -+ /// -+ /// * The offset being in bounds cannot rely on "wrapping around" -+ /// the address space. That is, the infinite-precision sum must fit -+ /// in a usize. -+ /// -+ /// The compiler and standard library generally tries to ensure -+ /// allocations never reach a size where an offset is a concern. For -+ /// instance, `Vec` and `Box` ensure they never allocate more than -+ /// `isize::MAX` bytes, so -+ /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. -+ /// -+ /// Most platforms fundamentally can't even construct such an -+ /// allocation. For instance, no known 64-bit platform can ever -+ /// serve a request for 263 bytes due to page-table -+ /// limitations or splitting the address space. However, some 32-bit -+ /// and 16-bit platforms may successfully serve a request for more -+ /// than `isize::MAX` bytes with things like Physical Address -+ /// Extension. As such, memory acquired directly from allocators or -+ /// memory mapped files *may* be too large to handle with this -+ /// function. -+ /// -+ /// Consider using `wrapping_offset` instead if these constraints -+ /// are difficult to satisfy. The only advantage of this method is -+ /// that it enables more aggressive compiler optimizations. -+ #[inline] -+ #[allow(clippy::should_implement_trait)] -+ pub unsafe fn sub(self, count: $usize_ty) -> Self { -+ let x: $isize_ty = count.cast(); -+ // note: - is currently wrapping_neg -+ self.offset(-x) -+ } -+ -+ /// Calculates the offset from a pointer using wrapping arithmetic. -+ /// (convenience for `.wrapping_offset(count as isize)`) -+ /// -+ /// `count` is in units of T; e.g. a `count` of 3 represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// The resulting pointer does not need to be in bounds, but it is -+ /// potentially hazardous to dereference (which requires `unsafe`). -+ /// -+ /// Always use `.add(count)` instead when possible, because `add` -+ /// allows the compiler to optimize better. -+ #[inline] -+ pub fn wrapping_add(self, count: $usize_ty) -> Self { -+ self.wrapping_offset(count.cast()) -+ } -+ -+ /// Calculates the offset from a pointer using wrapping arithmetic. -+ /// (convenience for `.wrapping_offset((count as -+ /// isize).wrapping_sub())`) -+ /// -+ /// `count` is in units of T; e.g. a `count` of 3 represents a -+ /// pointer offset of `3 * size_of::()` bytes. -+ /// -+ /// # Safety -+ /// -+ /// The resulting pointer does not need to be in bounds, but it is -+ /// potentially hazardous to dereference (which requires `unsafe`). -+ /// -+ /// Always use `.sub(count)` instead when possible, because `sub` -+ /// allows the compiler to optimize better. -+ #[inline] -+ pub fn wrapping_sub(self, count: $usize_ty) -> Self { -+ let x: $isize_ty = count.cast(); -+ self.wrapping_offset(-1 * x) -+ } -+ } -+ -+ impl $id { -+ /// Shuffle vector elements according to `indices`. -+ #[inline] -+ pub fn shuffle1_dyn(self, indices: I) -> Self -+ where -+ Self: codegen::shuffle1_dyn::Shuffle1Dyn, -+ { -+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _shuffle1_dyn>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn shuffle1_dyn() { -+ let (null, non_null) = ptr_vals!($id); -+ -+ // alternating = [non_null, null, non_null, null, ...] -+ let mut alternating = $id::::splat(null); -+ for i in 0..$id::::lanes() { -+ if i % 2 == 0 { -+ alternating = alternating.replace(i, non_null); -+ } -+ } -+ -+ type Indices = <$id -+ as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; -+ // even = [0, 0, 2, 2, 4, 4, ..] -+ let even = { -+ let mut v = Indices::splat(0); -+ for i in 0..$id::::lanes() { -+ if i % 2 == 0 { -+ v = v.replace(i, (i as u8).into()); -+ } else { -+ v = v.replace(i, (i as u8 - 1).into()); -+ } -+ } -+ v -+ }; -+ // odd = [1, 1, 3, 3, 5, 5, ...] -+ let odd = { -+ let mut v = Indices::splat(0); -+ for i in 0..$id::::lanes() { -+ if i % 2 != 0 { -+ v = v.replace(i, (i as u8).into()); -+ } else { -+ v = v.replace(i, (i as u8 + 1).into()); -+ } -+ } -+ v -+ }; -+ -+ assert_eq!( -+ alternating.shuffle1_dyn(even), -+ $id::::splat(non_null) -+ ); -+ if $id::::lanes() > 1 { -+ assert_eq!( -+ alternating.shuffle1_dyn(odd), -+ $id::::splat(null) -+ ); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs -new file mode 100644 -index 000000000000..f71c98795da3 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops.rs -@@ -0,0 +1,32 @@ -+//! Implementation of the `ops` traits -+#[macro_use] -+mod vector_mask_bitwise; -+#[macro_use] -+mod scalar_mask_bitwise; -+ -+#[macro_use] -+mod vector_arithmetic; -+#[macro_use] -+mod scalar_arithmetic; -+ -+#[macro_use] -+mod vector_bitwise; -+#[macro_use] -+mod scalar_bitwise; -+ -+#[macro_use] -+mod vector_shifts; -+#[macro_use] -+mod scalar_shifts; -+ -+#[macro_use] -+mod vector_rotates; -+ -+#[macro_use] -+mod vector_neg; -+ -+#[macro_use] -+mod vector_int_min_max; -+ -+#[macro_use] -+mod vector_float_min_max; -diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs -new file mode 100644 -index 000000000000..da1a2037eaaf ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs -@@ -0,0 +1,203 @@ -+//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. -+ -+macro_rules! impl_ops_scalar_arithmetic { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::ops::Add<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn add(self, other: $elem_ty) -> Self { -+ self + $id::splat(other) -+ } -+ } -+ impl crate::ops::Add<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn add(self, other: $id) -> $id { -+ $id::splat(self) + other -+ } -+ } -+ -+ impl crate::ops::Sub<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn sub(self, other: $elem_ty) -> Self { -+ self - $id::splat(other) -+ } -+ } -+ impl crate::ops::Sub<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn sub(self, other: $id) -> $id { -+ $id::splat(self) - other -+ } -+ } -+ -+ impl crate::ops::Mul<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn mul(self, other: $elem_ty) -> Self { -+ self * $id::splat(other) -+ } -+ } -+ impl crate::ops::Mul<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn mul(self, other: $id) -> $id { -+ $id::splat(self) * other -+ } -+ } -+ -+ impl crate::ops::Div<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn div(self, other: $elem_ty) -> Self { -+ self / $id::splat(other) -+ } -+ } -+ impl crate::ops::Div<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn div(self, other: $id) -> $id { -+ $id::splat(self) / other -+ } -+ } -+ -+ impl crate::ops::Rem<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn rem(self, other: $elem_ty) -> Self { -+ self % $id::splat(other) -+ } -+ } -+ impl crate::ops::Rem<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn rem(self, other: $id) -> $id { -+ $id::splat(self) % other -+ } -+ } -+ -+ impl crate::ops::AddAssign<$elem_ty> for $id { -+ #[inline] -+ fn add_assign(&mut self, other: $elem_ty) { -+ *self = *self + other; -+ } -+ } -+ -+ impl crate::ops::SubAssign<$elem_ty> for $id { -+ #[inline] -+ fn sub_assign(&mut self, other: $elem_ty) { -+ *self = *self - other; -+ } -+ } -+ -+ impl crate::ops::MulAssign<$elem_ty> for $id { -+ #[inline] -+ fn mul_assign(&mut self, other: $elem_ty) { -+ *self = *self * other; -+ } -+ } -+ -+ impl crate::ops::DivAssign<$elem_ty> for $id { -+ #[inline] -+ fn div_assign(&mut self, other: $elem_ty) { -+ *self = *self / other; -+ } -+ } -+ -+ impl crate::ops::RemAssign<$elem_ty> for $id { -+ #[inline] -+ fn rem_assign(&mut self, other: $elem_ty) { -+ *self = *self % other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_scalar_arith>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_scalar_arithmetic() { -+ let zi = 0 as $elem_ty; -+ let oi = 1 as $elem_ty; -+ let ti = 2 as $elem_ty; -+ let fi = 4 as $elem_ty; -+ let z = $id::splat(zi); -+ let o = $id::splat(oi); -+ let t = $id::splat(ti); -+ let f = $id::splat(fi); -+ -+ // add -+ assert_eq!(zi + z, z); -+ assert_eq!(z + zi, z); -+ assert_eq!(oi + z, o); -+ assert_eq!(o + zi, o); -+ assert_eq!(ti + z, t); -+ assert_eq!(t + zi, t); -+ assert_eq!(ti + t, f); -+ assert_eq!(t + ti, f); -+ // sub -+ assert_eq!(zi - z, z); -+ assert_eq!(z - zi, z); -+ assert_eq!(oi - z, o); -+ assert_eq!(o - zi, o); -+ assert_eq!(ti - z, t); -+ assert_eq!(t - zi, t); -+ assert_eq!(fi - t, t); -+ assert_eq!(f - ti, t); -+ assert_eq!(f - o - o, t); -+ assert_eq!(f - oi - oi, t); -+ // mul -+ assert_eq!(zi * z, z); -+ assert_eq!(z * zi, z); -+ assert_eq!(zi * o, z); -+ assert_eq!(z * oi, z); -+ assert_eq!(zi * t, z); -+ assert_eq!(z * ti, z); -+ assert_eq!(oi * t, t); -+ assert_eq!(o * ti, t); -+ assert_eq!(ti * t, f); -+ assert_eq!(t * ti, f); -+ // div -+ assert_eq!(zi / o, z); -+ assert_eq!(z / oi, z); -+ assert_eq!(ti / o, t); -+ assert_eq!(t / oi, t); -+ assert_eq!(fi / o, f); -+ assert_eq!(f / oi, f); -+ assert_eq!(ti / t, o); -+ assert_eq!(t / ti, o); -+ assert_eq!(fi / t, t); -+ assert_eq!(f / ti, t); -+ // rem -+ assert_eq!(oi % o, z); -+ assert_eq!(o % oi, z); -+ assert_eq!(fi % t, z); -+ assert_eq!(f % ti, z); -+ -+ { -+ let mut v = z; -+ assert_eq!(v, z); -+ v += oi; // add_assign -+ assert_eq!(v, o); -+ v -= oi; // sub_assign -+ assert_eq!(v, z); -+ v = t; -+ v *= oi; // mul_assign -+ assert_eq!(v, t); -+ v *= ti; -+ assert_eq!(v, f); -+ v /= oi; // div_assign -+ assert_eq!(v, f); -+ v /= ti; -+ assert_eq!(v, t); -+ v %= ti; // rem_assign -+ assert_eq!(v, z); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs -new file mode 100644 -index 000000000000..88216769aec4 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs -@@ -0,0 +1,162 @@ -+//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. -+ -+macro_rules! impl_ops_scalar_bitwise { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl crate::ops::BitXor<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn bitxor(self, other: $elem_ty) -> Self { -+ self ^ $id::splat(other) -+ } -+ } -+ impl crate::ops::BitXor<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn bitxor(self, other: $id) -> $id { -+ $id::splat(self) ^ other -+ } -+ } -+ -+ impl crate::ops::BitAnd<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn bitand(self, other: $elem_ty) -> Self { -+ self & $id::splat(other) -+ } -+ } -+ impl crate::ops::BitAnd<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn bitand(self, other: $id) -> $id { -+ $id::splat(self) & other -+ } -+ } -+ -+ impl crate::ops::BitOr<$elem_ty> for $id { -+ type Output = Self; -+ #[inline] -+ fn bitor(self, other: $elem_ty) -> Self { -+ self | $id::splat(other) -+ } -+ } -+ impl crate::ops::BitOr<$id> for $elem_ty { -+ type Output = $id; -+ #[inline] -+ fn bitor(self, other: $id) -> $id { -+ $id::splat(self) | other -+ } -+ } -+ -+ impl crate::ops::BitAndAssign<$elem_ty> for $id { -+ #[inline] -+ fn bitand_assign(&mut self, other: $elem_ty) { -+ *self = *self & other; -+ } -+ } -+ impl crate::ops::BitOrAssign<$elem_ty> for $id { -+ #[inline] -+ fn bitor_assign(&mut self, other: $elem_ty) { -+ *self = *self | other; -+ } -+ } -+ impl crate::ops::BitXorAssign<$elem_ty> for $id { -+ #[inline] -+ fn bitxor_assign(&mut self, other: $elem_ty) { -+ *self = *self ^ other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_scalar_bitwise>] { -+ use super::*; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_scalar_bitwise() { -+ let zi = 0 as $elem_ty; -+ let oi = 1 as $elem_ty; -+ let ti = 2 as $elem_ty; -+ let z = $id::splat(zi); -+ let o = $id::splat(oi); -+ let t = $id::splat(ti); -+ -+ // BitAnd: -+ assert_eq!(oi & o, o); -+ assert_eq!(o & oi, o); -+ assert_eq!(oi & z, z); -+ assert_eq!(o & zi, z); -+ assert_eq!(zi & o, z); -+ assert_eq!(z & oi, z); -+ assert_eq!(zi & z, z); -+ assert_eq!(z & zi, z); -+ -+ assert_eq!(ti & t, t); -+ assert_eq!(t & ti, t); -+ assert_eq!(ti & o, z); -+ assert_eq!(t & oi, z); -+ assert_eq!(oi & t, z); -+ assert_eq!(o & ti, z); -+ -+ // BitOr: -+ assert_eq!(oi | o, o); -+ assert_eq!(o | oi, o); -+ assert_eq!(oi | z, o); -+ assert_eq!(o | zi, o); -+ assert_eq!(zi | o, o); -+ assert_eq!(z | oi, o); -+ assert_eq!(zi | z, z); -+ assert_eq!(z | zi, z); -+ -+ assert_eq!(ti | t, t); -+ assert_eq!(t | ti, t); -+ assert_eq!(zi | t, t); -+ assert_eq!(z | ti, t); -+ assert_eq!(ti | z, t); -+ assert_eq!(t | zi, t); -+ -+ // BitXOR: -+ assert_eq!(oi ^ o, z); -+ assert_eq!(o ^ oi, z); -+ assert_eq!(zi ^ z, z); -+ assert_eq!(z ^ zi, z); -+ assert_eq!(zi ^ o, o); -+ assert_eq!(z ^ oi, o); -+ assert_eq!(oi ^ z, o); -+ assert_eq!(o ^ zi, o); -+ -+ assert_eq!(ti ^ t, z); -+ assert_eq!(t ^ ti, z); -+ assert_eq!(ti ^ z, t); -+ assert_eq!(t ^ zi, t); -+ assert_eq!(zi ^ t, t); -+ assert_eq!(z ^ ti, t); -+ -+ { -+ // AndAssign: -+ let mut v = o; -+ v &= ti; -+ assert_eq!(v, z); -+ } -+ { -+ // OrAssign: -+ let mut v = z; -+ v |= oi; -+ assert_eq!(v, o); -+ } -+ { -+ // XORAssign: -+ let mut v = z; -+ v ^= oi; -+ assert_eq!(v, o); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs -new file mode 100644 -index 000000000000..523a85207b6b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs -@@ -0,0 +1,140 @@ -+//! Vertical (lane-wise) vector-vector bitwise operations. -+ -+macro_rules! impl_ops_scalar_mask_bitwise { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl crate::ops::BitXor for $id { -+ type Output = Self; -+ #[inline] -+ fn bitxor(self, other: bool) -> Self { -+ self ^ $id::splat(other) -+ } -+ } -+ impl crate::ops::BitXor<$id> for bool { -+ type Output = $id; -+ #[inline] -+ fn bitxor(self, other: $id) -> $id { -+ $id::splat(self) ^ other -+ } -+ } -+ -+ impl crate::ops::BitAnd for $id { -+ type Output = Self; -+ #[inline] -+ fn bitand(self, other: bool) -> Self { -+ self & $id::splat(other) -+ } -+ } -+ impl crate::ops::BitAnd<$id> for bool { -+ type Output = $id; -+ #[inline] -+ fn bitand(self, other: $id) -> $id { -+ $id::splat(self) & other -+ } -+ } -+ -+ impl crate::ops::BitOr for $id { -+ type Output = Self; -+ #[inline] -+ fn bitor(self, other: bool) -> Self { -+ self | $id::splat(other) -+ } -+ } -+ impl crate::ops::BitOr<$id> for bool { -+ type Output = $id; -+ #[inline] -+ fn bitor(self, other: $id) -> $id { -+ $id::splat(self) | other -+ } -+ } -+ -+ impl crate::ops::BitAndAssign for $id { -+ #[inline] -+ fn bitand_assign(&mut self, other: bool) { -+ *self = *self & other; -+ } -+ } -+ impl crate::ops::BitOrAssign for $id { -+ #[inline] -+ fn bitor_assign(&mut self, other: bool) { -+ *self = *self | other; -+ } -+ } -+ impl crate::ops::BitXorAssign for $id { -+ #[inline] -+ fn bitxor_assign(&mut self, other: bool) { -+ *self = *self ^ other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_scalar_mask_bitwise>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_scalar_mask_bitwise() { -+ let ti = true; -+ let fi = false; -+ let t = $id::splat(ti); -+ let f = $id::splat(fi); -+ assert!(t != f); -+ assert!(!(t == f)); -+ -+ // BitAnd: -+ assert_eq!(ti & f, f); -+ assert_eq!(t & fi, f); -+ assert_eq!(fi & t, f); -+ assert_eq!(f & ti, f); -+ assert_eq!(ti & t, t); -+ assert_eq!(t & ti, t); -+ assert_eq!(fi & f, f); -+ assert_eq!(f & fi, f); -+ -+ // BitOr: -+ assert_eq!(ti | f, t); -+ assert_eq!(t | fi, t); -+ assert_eq!(fi | t, t); -+ assert_eq!(f | ti, t); -+ assert_eq!(ti | t, t); -+ assert_eq!(t | ti, t); -+ assert_eq!(fi | f, f); -+ assert_eq!(f | fi, f); -+ -+ // BitXOR: -+ assert_eq!(ti ^ f, t); -+ assert_eq!(t ^ fi, t); -+ assert_eq!(fi ^ t, t); -+ assert_eq!(f ^ ti, t); -+ assert_eq!(ti ^ t, f); -+ assert_eq!(t ^ ti, f); -+ assert_eq!(fi ^ f, f); -+ assert_eq!(f ^ fi, f); -+ -+ { -+ // AndAssign: -+ let mut v = f; -+ v &= ti; -+ assert_eq!(v, f); -+ } -+ { -+ // OrAssign: -+ let mut v = f; -+ v |= ti; -+ assert_eq!(v, t); -+ } -+ { -+ // XORAssign: -+ let mut v = f; -+ v ^= ti; -+ assert_eq!(v, t); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs -new file mode 100644 -index 000000000000..9c164ad56c0b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs -@@ -0,0 +1,107 @@ -+//! Vertical (lane-wise) vector-scalar shifts operations. -+ -+macro_rules! impl_ops_scalar_shifts { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::ops::Shl for $id { -+ type Output = Self; -+ #[inline] -+ fn shl(self, other: u32) -> Self { -+ self << $id::splat(other as $elem_ty) -+ } -+ } -+ impl crate::ops::Shr for $id { -+ type Output = Self; -+ #[inline] -+ fn shr(self, other: u32) -> Self { -+ self >> $id::splat(other as $elem_ty) -+ } -+ } -+ -+ impl crate::ops::ShlAssign for $id { -+ #[inline] -+ fn shl_assign(&mut self, other: u32) { -+ *self = *self << other; -+ } -+ } -+ impl crate::ops::ShrAssign for $id { -+ #[inline] -+ fn shr_assign(&mut self, other: u32) { -+ *self = *self >> other; -+ } -+ } -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_scalar_shifts>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), -+ allow(unreachable_code, -+ unused_variables, -+ unused_mut) -+ )] -+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 -+ fn ops_scalar_shifts() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ { -+ let zi = 0 as u32; -+ let oi = 1 as u32; -+ let ti = 2 as u32; -+ let maxi -+ = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; -+ -+ // shr -+ assert_eq!(z >> zi, z); -+ assert_eq!(z >> oi, z); -+ assert_eq!(z >> ti, z); -+ assert_eq!(z >> ti, z); -+ -+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 -+ return; -+ } -+ -+ assert_eq!(o >> zi, o); -+ assert_eq!(t >> zi, t); -+ assert_eq!(f >> zi, f); -+ assert_eq!(f >> maxi, z); -+ -+ assert_eq!(o >> oi, z); -+ assert_eq!(t >> oi, o); -+ assert_eq!(t >> ti, z); -+ assert_eq!(f >> oi, t); -+ assert_eq!(f >> ti, o); -+ assert_eq!(f >> maxi, z); -+ -+ // shl -+ assert_eq!(z << zi, z); -+ assert_eq!(o << zi, o); -+ assert_eq!(t << zi, t); -+ assert_eq!(f << zi, f); -+ assert_eq!(f << maxi, z); -+ -+ assert_eq!(o << oi, t); -+ assert_eq!(o << ti, f); -+ assert_eq!(t << oi, f); -+ -+ { // shr_assign -+ let mut v = o; -+ v >>= oi; -+ assert_eq!(v, z); -+ } -+ { // shl_assign -+ let mut v = o; -+ v <<= oi; -+ assert_eq!(v, t); -+ } -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs -new file mode 100644 -index 000000000000..7057f52d0317 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs -@@ -0,0 +1,148 @@ -+//! Vertical (lane-wise) vector-vector arithmetic operations. -+ -+macro_rules! impl_ops_vector_arithmetic { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::ops::Add for $id { -+ type Output = Self; -+ #[inline] -+ fn add(self, other: Self) -> Self { -+ use crate::llvm::simd_add; -+ unsafe { Simd(simd_add(self.0, other.0)) } -+ } -+ } -+ -+ impl crate::ops::Sub for $id { -+ type Output = Self; -+ #[inline] -+ fn sub(self, other: Self) -> Self { -+ use crate::llvm::simd_sub; -+ unsafe { Simd(simd_sub(self.0, other.0)) } -+ } -+ } -+ -+ impl crate::ops::Mul for $id { -+ type Output = Self; -+ #[inline] -+ fn mul(self, other: Self) -> Self { -+ use crate::llvm::simd_mul; -+ unsafe { Simd(simd_mul(self.0, other.0)) } -+ } -+ } -+ -+ impl crate::ops::Div for $id { -+ type Output = Self; -+ #[inline] -+ fn div(self, other: Self) -> Self { -+ use crate::llvm::simd_div; -+ unsafe { Simd(simd_div(self.0, other.0)) } -+ } -+ } -+ -+ impl crate::ops::Rem for $id { -+ type Output = Self; -+ #[inline] -+ fn rem(self, other: Self) -> Self { -+ use crate::llvm::simd_rem; -+ unsafe { Simd(simd_rem(self.0, other.0)) } -+ } -+ } -+ -+ impl crate::ops::AddAssign for $id { -+ #[inline] -+ fn add_assign(&mut self, other: Self) { -+ *self = *self + other; -+ } -+ } -+ -+ impl crate::ops::SubAssign for $id { -+ #[inline] -+ fn sub_assign(&mut self, other: Self) { -+ *self = *self - other; -+ } -+ } -+ -+ impl crate::ops::MulAssign for $id { -+ #[inline] -+ fn mul_assign(&mut self, other: Self) { -+ *self = *self * other; -+ } -+ } -+ -+ impl crate::ops::DivAssign for $id { -+ #[inline] -+ fn div_assign(&mut self, other: Self) { -+ *self = *self / other; -+ } -+ } -+ -+ impl crate::ops::RemAssign for $id { -+ #[inline] -+ fn rem_assign(&mut self, other: Self) { -+ *self = *self % other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_arith>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_vector_arithmetic() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ // add -+ assert_eq!(z + z, z); -+ assert_eq!(o + z, o); -+ assert_eq!(t + z, t); -+ assert_eq!(t + t, f); -+ // sub -+ assert_eq!(z - z, z); -+ assert_eq!(o - z, o); -+ assert_eq!(t - z, t); -+ assert_eq!(f - t, t); -+ assert_eq!(f - o - o, t); -+ // mul -+ assert_eq!(z * z, z); -+ assert_eq!(z * o, z); -+ assert_eq!(z * t, z); -+ assert_eq!(o * t, t); -+ assert_eq!(t * t, f); -+ // div -+ assert_eq!(z / o, z); -+ assert_eq!(t / o, t); -+ assert_eq!(f / o, f); -+ assert_eq!(t / t, o); -+ assert_eq!(f / t, t); -+ // rem -+ assert_eq!(o % o, z); -+ assert_eq!(f % t, z); -+ -+ { -+ let mut v = z; -+ assert_eq!(v, z); -+ v += o; // add_assign -+ assert_eq!(v, o); -+ v -= o; // sub_assign -+ assert_eq!(v, z); -+ v = t; -+ v *= o; // mul_assign -+ assert_eq!(v, t); -+ v *= t; -+ assert_eq!(v, f); -+ v /= o; // div_assign -+ assert_eq!(v, f); -+ v /= t; -+ assert_eq!(v, t); -+ v %= t; // rem_assign -+ assert_eq!(v, z); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs -new file mode 100644 -index 000000000000..7be9603fa261 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs -@@ -0,0 +1,129 @@ -+//! Vertical (lane-wise) vector-vector bitwise operations. -+ -+macro_rules! impl_ops_vector_bitwise { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl crate::ops::Not for $id { -+ type Output = Self; -+ #[inline] -+ fn not(self) -> Self { -+ Self::splat($true) ^ self -+ } -+ } -+ impl crate::ops::BitXor for $id { -+ type Output = Self; -+ #[inline] -+ fn bitxor(self, other: Self) -> Self { -+ use crate::llvm::simd_xor; -+ unsafe { Simd(simd_xor(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitAnd for $id { -+ type Output = Self; -+ #[inline] -+ fn bitand(self, other: Self) -> Self { -+ use crate::llvm::simd_and; -+ unsafe { Simd(simd_and(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitOr for $id { -+ type Output = Self; -+ #[inline] -+ fn bitor(self, other: Self) -> Self { -+ use crate::llvm::simd_or; -+ unsafe { Simd(simd_or(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitAndAssign for $id { -+ #[inline] -+ fn bitand_assign(&mut self, other: Self) { -+ *self = *self & other; -+ } -+ } -+ impl crate::ops::BitOrAssign for $id { -+ #[inline] -+ fn bitor_assign(&mut self, other: Self) { -+ *self = *self | other; -+ } -+ } -+ impl crate::ops::BitXorAssign for $id { -+ #[inline] -+ fn bitxor_assign(&mut self, other: Self) { -+ *self = *self ^ other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_bitwise>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_vector_bitwise() { -+ -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let m = $id::splat(!z.extract(0)); -+ -+ // Not: -+ assert_eq!(!z, m); -+ assert_eq!(!m, z); -+ -+ // BitAnd: -+ assert_eq!(o & o, o); -+ assert_eq!(o & z, z); -+ assert_eq!(z & o, z); -+ assert_eq!(z & z, z); -+ -+ assert_eq!(t & t, t); -+ assert_eq!(t & o, z); -+ assert_eq!(o & t, z); -+ -+ // BitOr: -+ assert_eq!(o | o, o); -+ assert_eq!(o | z, o); -+ assert_eq!(z | o, o); -+ assert_eq!(z | z, z); -+ -+ assert_eq!(t | t, t); -+ assert_eq!(z | t, t); -+ assert_eq!(t | z, t); -+ -+ // BitXOR: -+ assert_eq!(o ^ o, z); -+ assert_eq!(z ^ z, z); -+ assert_eq!(z ^ o, o); -+ assert_eq!(o ^ z, o); -+ -+ assert_eq!(t ^ t, z); -+ assert_eq!(t ^ z, t); -+ assert_eq!(z ^ t, t); -+ -+ { -+ // AndAssign: -+ let mut v = o; -+ v &= t; -+ assert_eq!(v, z); -+ } -+ { -+ // OrAssign: -+ let mut v = z; -+ v |= o; -+ assert_eq!(v, o); -+ } -+ { -+ // XORAssign: -+ let mut v = z; -+ v ^= o; -+ assert_eq!(v, o); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs -new file mode 100644 -index 000000000000..4126e87042f5 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs -@@ -0,0 +1,69 @@ -+//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. -+ -+macro_rules! impl_ops_vector_float_min_max { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Minimum of two vectors. -+ /// -+ /// Returns a new vector containing the minimum value of each of -+ /// the input vector lanes. -+ #[inline] -+ pub fn min(self, x: Self) -> Self { -+ use crate::llvm::simd_fmin; -+ unsafe { Simd(simd_fmin(self.0, x.0)) } -+ } -+ -+ /// Maximum of two vectors. -+ /// -+ /// Returns a new vector containing the maximum value of each of -+ /// the input vector lanes. -+ #[inline] -+ pub fn max(self, x: Self) -> Self { -+ use crate::llvm::simd_fmax; -+ unsafe { Simd(simd_fmax(self.0, x.0)) } -+ } -+ } -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_min_max>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn min_max() { -+ let n = crate::$elem_ty::NAN; -+ let o = $id::splat(1. as $elem_ty); -+ let t = $id::splat(2. as $elem_ty); -+ -+ let mut m = o; // [1., 2., 1., 2., ...] -+ let mut on = o; -+ for i in 0..$id::lanes() { -+ if i % 2 == 0 { -+ m = m.replace(i, 2. as $elem_ty); -+ on = on.replace(i, n); -+ } -+ } -+ -+ assert_eq!(o.min(t), o); -+ assert_eq!(t.min(o), o); -+ assert_eq!(m.min(o), o); -+ assert_eq!(o.min(m), o); -+ assert_eq!(m.min(t), m); -+ assert_eq!(t.min(m), m); -+ -+ assert_eq!(o.max(t), t); -+ assert_eq!(t.max(o), t); -+ assert_eq!(m.max(o), m); -+ assert_eq!(o.max(m), m); -+ assert_eq!(m.max(t), t); -+ assert_eq!(t.max(m), t); -+ -+ assert_eq!(on.min(o), o); -+ assert_eq!(o.min(on), o); -+ assert_eq!(on.max(o), o); -+ assert_eq!(o.max(on), o); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs -new file mode 100644 -index 000000000000..36ea98e6bf32 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs -@@ -0,0 +1,57 @@ -+//! Vertical (lane-wise) vector `min` and `max` for integer vectors. -+ -+macro_rules! impl_ops_vector_int_min_max { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Minimum of two vectors. -+ /// -+ /// Returns a new vector containing the minimum value of each of -+ /// the input vector lanes. -+ #[inline] -+ pub fn min(self, x: Self) -> Self { -+ self.lt(x).select(self, x) -+ } -+ -+ /// Maximum of two vectors. -+ /// -+ /// Returns a new vector containing the maximum value of each of -+ /// the input vector lanes. -+ #[inline] -+ pub fn max(self, x: Self) -> Self { -+ self.gt(x).select(self, x) -+ } -+ } -+ test_if!{$test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_min_max>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn min_max() { -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ -+ let mut m = o; -+ for i in 0..$id::lanes() { -+ if i % 2 == 0 { -+ m = m.replace(i, 2 as $elem_ty); -+ } -+ } -+ assert_eq!(o.min(t), o); -+ assert_eq!(t.min(o), o); -+ assert_eq!(m.min(o), o); -+ assert_eq!(o.min(m), o); -+ assert_eq!(m.min(t), m); -+ assert_eq!(t.min(m), m); -+ -+ assert_eq!(o.max(t), t); -+ assert_eq!(t.max(o), t); -+ assert_eq!(m.max(o), m); -+ assert_eq!(o.max(m), m); -+ assert_eq!(m.max(t), t); -+ assert_eq!(t.max(m), t); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs -new file mode 100644 -index 000000000000..295fc1ca81c9 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs -@@ -0,0 +1,116 @@ -+//! Vertical (lane-wise) vector-vector bitwise operations. -+ -+macro_rules! impl_ops_vector_mask_bitwise { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $test_tt:tt | -+ ($true:expr, $false:expr) -+ ) => { -+ impl crate::ops::Not for $id { -+ type Output = Self; -+ #[inline] -+ fn not(self) -> Self { -+ Self::splat($true) ^ self -+ } -+ } -+ impl crate::ops::BitXor for $id { -+ type Output = Self; -+ #[inline] -+ fn bitxor(self, other: Self) -> Self { -+ use crate::llvm::simd_xor; -+ unsafe { Simd(simd_xor(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitAnd for $id { -+ type Output = Self; -+ #[inline] -+ fn bitand(self, other: Self) -> Self { -+ use crate::llvm::simd_and; -+ unsafe { Simd(simd_and(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitOr for $id { -+ type Output = Self; -+ #[inline] -+ fn bitor(self, other: Self) -> Self { -+ use crate::llvm::simd_or; -+ unsafe { Simd(simd_or(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::BitAndAssign for $id { -+ #[inline] -+ fn bitand_assign(&mut self, other: Self) { -+ *self = *self & other; -+ } -+ } -+ impl crate::ops::BitOrAssign for $id { -+ #[inline] -+ fn bitor_assign(&mut self, other: Self) { -+ *self = *self | other; -+ } -+ } -+ impl crate::ops::BitXorAssign for $id { -+ #[inline] -+ fn bitxor_assign(&mut self, other: Self) { -+ *self = *self ^ other; -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_mask_bitwise>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn ops_vector_mask_bitwise() { -+ let t = $id::splat(true); -+ let f = $id::splat(false); -+ assert!(t != f); -+ assert!(!(t == f)); -+ -+ // Not: -+ assert_eq!(!t, f); -+ assert_eq!(t, !f); -+ -+ // BitAnd: -+ assert_eq!(t & f, f); -+ assert_eq!(f & t, f); -+ assert_eq!(t & t, t); -+ assert_eq!(f & f, f); -+ -+ // BitOr: -+ assert_eq!(t | f, t); -+ assert_eq!(f | t, t); -+ assert_eq!(t | t, t); -+ assert_eq!(f | f, f); -+ -+ // BitXOR: -+ assert_eq!(t ^ f, t); -+ assert_eq!(f ^ t, t); -+ assert_eq!(t ^ t, f); -+ assert_eq!(f ^ f, f); -+ -+ { -+ // AndAssign: -+ let mut v = f; -+ v &= t; -+ assert_eq!(v, f); -+ } -+ { -+ // OrAssign: -+ let mut v = f; -+ v |= t; -+ assert_eq!(v, t); -+ } -+ { -+ // XORAssign: -+ let mut v = f; -+ v ^= t; -+ assert_eq!(v, t); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs -new file mode 100644 -index 000000000000..e2d91fd2fed6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs -@@ -0,0 +1,43 @@ -+//! Vertical (lane-wise) vector `Neg`. -+ -+macro_rules! impl_ops_vector_neg { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::ops::Neg for $id { -+ type Output = Self; -+ #[inline] -+ fn neg(self) -> Self { -+ Self::splat(-1 as $elem_ty) * self -+ } -+ } -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_neg>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn neg() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ let nz = $id::splat(-(0 as $elem_ty)); -+ let no = $id::splat(-(1 as $elem_ty)); -+ let nt = $id::splat(-(2 as $elem_ty)); -+ let nf = $id::splat(-(4 as $elem_ty)); -+ -+ assert_eq!(-z, nz); -+ assert_eq!(-o, no); -+ assert_eq!(-t, nt); -+ assert_eq!(-f, nf); -+ -+ assert_eq!(z, -nz); -+ assert_eq!(o, -no); -+ assert_eq!(t, -nt); -+ assert_eq!(f, -nf); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs -new file mode 100644 -index 000000000000..6c794ecf4b93 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs -@@ -0,0 +1,90 @@ -+//! Vertical (lane-wise) vector rotates operations. -+#![allow(unused)] -+ -+macro_rules! impl_ops_vector_rotates { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Shifts the bits of each lane to the left by the specified -+ /// amount in the corresponding lane of `n`, wrapping the -+ /// truncated bits to the end of the resulting integer. -+ /// -+ /// Note: this is neither the same operation as `<<` nor equivalent -+ /// to `slice::rotate_left`. -+ #[inline] -+ pub fn rotate_left(self, n: $id) -> $id { -+ const LANE_WIDTH: $elem_ty = -+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; -+ // Protect against undefined behavior for over-long bit shifts -+ let n = n % LANE_WIDTH; -+ (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) -+ } -+ -+ /// Shifts the bits of each lane to the right by the specified -+ /// amount in the corresponding lane of `n`, wrapping the -+ /// truncated bits to the beginning of the resulting integer. -+ /// -+ /// Note: this is neither the same operation as `<<` nor equivalent -+ /// to `slice::rotate_left`. -+ #[inline] -+ pub fn rotate_right(self, n: $id) -> $id { -+ const LANE_WIDTH: $elem_ty = -+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; -+ // Protect against undefined behavior for over-long bit shifts -+ let n = n % LANE_WIDTH; -+ (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ // FIXME: -+ // https://github.com/rust-lang-nursery/packed_simd/issues/75 -+ #[cfg(not(any( -+ target_arch = "s390x", -+ target_arch = "sparc64", -+ )))] -+ pub mod [<$id _ops_vector_rotate>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn rotate_ops() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ let max = $id::splat( -+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); -+ -+ // rotate_right -+ assert_eq!(z.rotate_right(z), z); -+ assert_eq!(z.rotate_right(o), z); -+ assert_eq!(z.rotate_right(t), z); -+ -+ assert_eq!(o.rotate_right(z), o); -+ assert_eq!(t.rotate_right(z), t); -+ assert_eq!(f.rotate_right(z), f); -+ assert_eq!(f.rotate_right(max), f << 1); -+ -+ assert_eq!(o.rotate_right(o), o << max); -+ assert_eq!(t.rotate_right(o), o); -+ assert_eq!(t.rotate_right(t), o << max); -+ assert_eq!(f.rotate_right(o), t); -+ assert_eq!(f.rotate_right(t), o); -+ -+ // rotate_left -+ assert_eq!(z.rotate_left(z), z); -+ assert_eq!(o.rotate_left(z), o); -+ assert_eq!(t.rotate_left(z), t); -+ assert_eq!(f.rotate_left(z), f); -+ assert_eq!(f.rotate_left(max), t); -+ -+ assert_eq!(o.rotate_left(o), t); -+ assert_eq!(o.rotate_left(t), f); -+ assert_eq!(t.rotate_left(o), f); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs -new file mode 100644 -index 000000000000..22e1fbc0ec76 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs -@@ -0,0 +1,107 @@ -+//! Vertical (lane-wise) vector-vector shifts operations. -+ -+macro_rules! impl_ops_vector_shifts { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl crate::ops::Shl<$id> for $id { -+ type Output = Self; -+ #[inline] -+ fn shl(self, other: Self) -> Self { -+ use crate::llvm::simd_shl; -+ unsafe { Simd(simd_shl(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::Shr<$id> for $id { -+ type Output = Self; -+ #[inline] -+ fn shr(self, other: Self) -> Self { -+ use crate::llvm::simd_shr; -+ unsafe { Simd(simd_shr(self.0, other.0)) } -+ } -+ } -+ impl crate::ops::ShlAssign<$id> for $id { -+ #[inline] -+ fn shl_assign(&mut self, other: Self) { -+ *self = *self << other; -+ } -+ } -+ impl crate::ops::ShrAssign<$id> for $id { -+ #[inline] -+ fn shr_assign(&mut self, other: Self) { -+ *self = *self >> other; -+ } -+ } -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _ops_vector_shifts>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), -+ allow(unreachable_code, -+ unused_variables, -+ unused_mut) -+ )] -+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 -+ fn ops_vector_shifts() { -+ let z = $id::splat(0 as $elem_ty); -+ let o = $id::splat(1 as $elem_ty); -+ let t = $id::splat(2 as $elem_ty); -+ let f = $id::splat(4 as $elem_ty); -+ -+ let max =$id::splat( -+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty -+ ); -+ -+ // shr -+ assert_eq!(z >> z, z); -+ assert_eq!(z >> o, z); -+ assert_eq!(z >> t, z); -+ assert_eq!(z >> t, z); -+ -+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { -+ // FIXME: rust produces bad codegen for shifts: -+ // https://github.com/rust-lang-nursery/packed_simd/issues/13 -+ return; -+ } -+ -+ assert_eq!(o >> z, o); -+ assert_eq!(t >> z, t); -+ assert_eq!(f >> z, f); -+ assert_eq!(f >> max, z); -+ -+ assert_eq!(o >> o, z); -+ assert_eq!(t >> o, o); -+ assert_eq!(t >> t, z); -+ assert_eq!(f >> o, t); -+ assert_eq!(f >> t, o); -+ assert_eq!(f >> max, z); -+ -+ // shl -+ assert_eq!(z << z, z); -+ assert_eq!(o << z, o); -+ assert_eq!(t << z, t); -+ assert_eq!(f << z, f); -+ assert_eq!(f << max, z); -+ -+ assert_eq!(o << o, t); -+ assert_eq!(o << t, f); -+ assert_eq!(t << o, f); -+ -+ { -+ // shr_assign -+ let mut v = o; -+ v >>= o; -+ assert_eq!(v, z); -+ } -+ { -+ // shl_assign -+ let mut v = o; -+ v <<= o; -+ assert_eq!(v, t); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs -new file mode 100644 -index 000000000000..d2e523a49faf ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ptr.rs -@@ -0,0 +1,4 @@ -+//! Vector of pointers -+ -+#[macro_use] -+mod gather_scatter; -diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs -new file mode 100644 -index 000000000000..9d8e113bb44f ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs -@@ -0,0 +1,241 @@ -+//! Implements masked gather and scatters for vectors of pointers -+ -+macro_rules! impl_ptr_read { -+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident -+ | $test_tt:tt) => { -+ impl $id -+ where -+ [T; $elem_count]: sealed::SimdArray, -+ { -+ /// Reads selected vector elements from memory. -+ /// -+ /// Instantiates a new vector by reading the values from `self` for -+ /// those lanes whose `mask` is `true`, and using the elements of -+ /// `value` otherwise. -+ /// -+ /// No memory is accessed for those lanes of `self` whose `mask` is -+ /// `false`. -+ /// -+ /// # Safety -+ /// -+ /// This method is unsafe because it dereferences raw pointers. The -+ /// pointers must be aligned to `mem::align_of::()`. -+ #[inline] -+ pub unsafe fn read( -+ self, mask: Simd<[M; $elem_count]>, -+ value: Simd<[T; $elem_count]>, -+ ) -> Simd<[T; $elem_count]> -+ where -+ M: sealed::Mask, -+ [M; $elem_count]: sealed::SimdArray, -+ { -+ use crate::llvm::simd_gather; -+ Simd(simd_gather(value.0, self.0, mask.0)) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ mod [<$id _read>] { -+ use super::*; -+ #[test] -+ fn read() { -+ let mut v = [0_i32; $elem_count]; -+ for i in 0..$elem_count { -+ v[i] = i as i32; -+ } -+ -+ let mut ptr = $id::::null(); -+ -+ for i in 0..$elem_count { -+ ptr = ptr.replace(i, unsafe { -+ crate::mem::transmute(&v[i] as *const i32) -+ }); -+ } -+ -+ // all mask elements are true: -+ let mask = $mask_ty::splat(true); -+ let def = Simd::<[i32; $elem_count]>::splat(42_i32); -+ let r: Simd<[i32; $elem_count]> = unsafe { -+ ptr.read(mask, def) -+ }; -+ assert_eq!( -+ r, -+ Simd::<[i32; $elem_count]>::from_slice_unaligned( -+ &v -+ ) -+ ); -+ -+ let mut mask = mask; -+ for i in 0..$elem_count { -+ if i % 2 != 0 { -+ mask = mask.replace(i, false); -+ } -+ } -+ -+ // even mask elements are true, odd ones are false: -+ let r: Simd<[i32; $elem_count]> = unsafe { -+ ptr.read(mask, def) -+ }; -+ let mut e = v; -+ for i in 0..$elem_count { -+ if i % 2 != 0 { -+ e[i] = 42; -+ } -+ } -+ assert_eq!( -+ r, -+ Simd::<[i32; $elem_count]>::from_slice_unaligned( -+ &e -+ ) -+ ); -+ -+ // all mask elements are false: -+ let mask = $mask_ty::splat(false); -+ let def = Simd::<[i32; $elem_count]>::splat(42_i32); -+ let r: Simd<[i32; $elem_count]> = unsafe { -+ ptr.read(mask, def) } -+ ; -+ assert_eq!(r, def); -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_ptr_write { -+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident -+ | $test_tt:tt) => { -+ impl $id -+ where -+ [T; $elem_count]: sealed::SimdArray, -+ { -+ /// Writes selected vector elements to memory. -+ /// -+ /// Writes the lanes of `values` for which the mask is `true` to -+ /// their corresponding memory addresses in `self`. -+ /// -+ /// No memory is accessed for those lanes of `self` whose `mask` is -+ /// `false`. -+ /// -+ /// Overlapping memory addresses of `self` are written to in order -+ /// from the lest-significant to the most-significant element. -+ /// -+ /// # Safety -+ /// -+ /// This method is unsafe because it dereferences raw pointers. The -+ /// pointers must be aligned to `mem::align_of::()`. -+ #[inline] -+ pub unsafe fn write( -+ self, mask: Simd<[M; $elem_count]>, -+ value: Simd<[T; $elem_count]>, -+ ) where -+ M: sealed::Mask, -+ [M; $elem_count]: sealed::SimdArray, -+ { -+ // FIXME: -+ // https://github.com/rust-lang-nursery/packed_simd/issues/85 -+ #[cfg(not(target_arch = "mips"))] -+ { -+ use crate::llvm::simd_scatter; -+ simd_scatter(value.0, self.0, mask.0) -+ } -+ #[cfg(target_arch = "mips")] -+ { -+ let m_ptr = -+ &mask as *const Simd<[M; $elem_count]> as *const M; -+ for i in 0..$elem_count { -+ let m = ptr::read(m_ptr.add(i)); -+ if m.test() { -+ let t_ptr = &self -+ as *const Simd<[*mut T; $elem_count]> -+ as *mut *mut T; -+ let v_ptr = &value as *const Simd<[T; $elem_count]> -+ as *const T; -+ ptr::write( -+ ptr::read(t_ptr.add(i)), -+ ptr::read(v_ptr.add(i)), -+ ); -+ } -+ } -+ } -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ mod [<$id _write>] { -+ use super::*; -+ #[test] -+ fn write() { -+ // fourty_two = [42, 42, 42, ...] -+ let fourty_two -+ = Simd::<[i32; $elem_count]>::splat(42_i32); -+ -+ // This test will write to this array -+ let mut arr = [0_i32; $elem_count]; -+ for i in 0..$elem_count { -+ arr[i] = i as i32; -+ } -+ // arr = [0, 1, 2, ...] -+ -+ let mut ptr = $id::::null(); -+ for i in 0..$elem_count { -+ ptr = ptr.replace(i, unsafe { -+ crate::mem::transmute(arr.as_ptr().add(i)) -+ }); -+ } -+ // ptr = [&arr[0], &arr[1], ...] -+ -+ // write `fourty_two` to all elements of `v` -+ { -+ let backup = arr; -+ unsafe { -+ ptr.write($mask_ty::splat(true), fourty_two) -+ }; -+ assert_eq!(arr, [42_i32; $elem_count]); -+ arr = backup; // arr = [0, 1, 2, ...] -+ } -+ -+ // write 42 to even elements of arr: -+ { -+ // set odd elements of the mask to false -+ let mut mask = $mask_ty::splat(true); -+ for i in 0..$elem_count { -+ if i % 2 != 0 { -+ mask = mask.replace(i, false); -+ } -+ } -+ // mask = [true, false, true, false, ...] -+ -+ // expected result r = [42, 1, 42, 3, 42, 5, ...] -+ let mut r = arr; -+ for i in 0..$elem_count { -+ if i % 2 == 0 { -+ r[i] = 42; -+ } -+ } -+ -+ let backup = arr; -+ unsafe { ptr.write(mask, fourty_two) }; -+ assert_eq!(arr, r); -+ arr = backup; // arr = [0, 1, 2, 3, ...] -+ } -+ -+ // write 42 to no elements of arr -+ { -+ let backup = arr; -+ unsafe { -+ ptr.write($mask_ty::splat(false), fourty_two) -+ }; -+ assert_eq!(arr, backup); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs -new file mode 100644 -index 000000000000..54d2f0cc7f08 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions.rs -@@ -0,0 +1,12 @@ -+//! Reductions -+ -+#[macro_use] -+mod float_arithmetic; -+#[macro_use] -+mod integer_arithmetic; -+#[macro_use] -+mod bitwise; -+#[macro_use] -+mod mask; -+#[macro_use] -+mod min_max; -diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs -new file mode 100644 -index 000000000000..5bad4f474b16 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs -@@ -0,0 +1,151 @@ -+//! Implements portable horizontal bitwise vector reductions. -+#![allow(unused)] -+ -+macro_rules! impl_reduction_bitwise { -+ ( -+ [$elem_ty:ident; $elem_count:expr]: -+ $id:ident | $ielem_ty:ident | $test_tt:tt | -+ ($convert:expr) | -+ ($true:expr, $false:expr) -+ ) => { -+ impl $id { -+ /// Lane-wise bitwise `and` of the vector elements. -+ /// -+ /// Note: if the vector has one lane, the first element of the -+ /// vector is returned. -+ #[inline] -+ pub fn and(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_and; -+ let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; -+ $convert(r) -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on aarch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x &= self.extract(i) as $elem_ty; -+ } -+ x -+ } -+ } -+ -+ /// Lane-wise bitwise `or` of the vector elements. -+ /// -+ /// Note: if the vector has one lane, the first element of the -+ /// vector is returned. -+ #[inline] -+ pub fn or(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_or; -+ let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; -+ $convert(r) -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on aarch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x |= self.extract(i) as $elem_ty; -+ } -+ x -+ } -+ } -+ -+ /// Lane-wise bitwise `xor` of the vector elements. -+ /// -+ /// Note: if the vector has one lane, the first element of the -+ /// vector is returned. -+ #[inline] -+ pub fn xor(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_xor; -+ let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; -+ $convert(r) -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on aarch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x ^= self.extract(i) as $elem_ty; -+ } -+ x -+ } -+ } -+ } -+ -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _reduction_bitwise>] { -+ use super::*; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn and() { -+ let v = $id::splat($false); -+ assert_eq!(v.and(), $false); -+ let v = $id::splat($true); -+ assert_eq!(v.and(), $true); -+ let v = $id::splat($false); -+ let v = v.replace(0, $true); -+ if $id::lanes() > 1 { -+ assert_eq!(v.and(), $false); -+ } else { -+ assert_eq!(v.and(), $true); -+ } -+ let v = $id::splat($true); -+ let v = v.replace(0, $false); -+ assert_eq!(v.and(), $false); -+ -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn or() { -+ let v = $id::splat($false); -+ assert_eq!(v.or(), $false); -+ let v = $id::splat($true); -+ assert_eq!(v.or(), $true); -+ let v = $id::splat($false); -+ let v = v.replace(0, $true); -+ assert_eq!(v.or(), $true); -+ let v = $id::splat($true); -+ let v = v.replace(0, $false); -+ if $id::lanes() > 1 { -+ assert_eq!(v.or(), $true); -+ } else { -+ assert_eq!(v.or(), $false); -+ } -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn xor() { -+ let v = $id::splat($false); -+ assert_eq!(v.xor(), $false); -+ let v = $id::splat($true); -+ if $id::lanes() > 1 { -+ assert_eq!(v.xor(), $false); -+ } else { -+ assert_eq!(v.xor(), $true); -+ } -+ let v = $id::splat($false); -+ let v = v.replace(0, $true); -+ assert_eq!(v.xor(), $true); -+ let v = $id::splat($true); -+ let v = v.replace(0, $false); -+ if $id::lanes() > 1 { -+ assert_eq!(v.xor(), $true); -+ } else { -+ assert_eq!(v.xor(), $false); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs -new file mode 100644 -index 000000000000..dd722ae25fdd ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs -@@ -0,0 +1,312 @@ -+//! Implements portable horizontal float vector arithmetic reductions. -+ -+macro_rules! impl_reduction_float_arithmetic { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Horizontal sum of the vector elements. -+ /// -+ /// The intrinsic performs a tree-reduction of the vector elements. -+ /// That is, for an 8 element vector: -+ /// -+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) -+ /// -+ /// If one of the vector element is `NaN` the reduction returns -+ /// `NaN`. The resulting `NaN` is not required to be equal to any -+ /// of the `NaN`s in the vector. -+ #[inline] -+ pub fn sum(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_add_ordered; -+ unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x += self.extract(i) as $elem_ty; -+ } -+ x -+ } -+ } -+ -+ /// Horizontal product of the vector elements. -+ /// -+ /// The intrinsic performs a tree-reduction of the vector elements. -+ /// That is, for an 8 element vector: -+ /// -+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) -+ /// -+ /// If one of the vector element is `NaN` the reduction returns -+ /// `NaN`. The resulting `NaN` is not required to be equal to any -+ /// of the `NaN`s in the vector. -+ #[inline] -+ pub fn product(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_mul_ordered; -+ unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x *= self.extract(i) as $elem_ty; -+ } -+ x -+ } -+ } -+ } -+ -+ impl crate::iter::Sum for $id { -+ #[inline] -+ fn sum>(iter: I) -> $id { -+ iter.fold($id::splat(0.), crate::ops::Add::add) -+ } -+ } -+ -+ impl crate::iter::Product for $id { -+ #[inline] -+ fn product>(iter: I) -> $id { -+ iter.fold($id::splat(1.), crate::ops::Mul::mul) -+ } -+ } -+ -+ impl<'a> crate::iter::Sum<&'a $id> for $id { -+ #[inline] -+ fn sum>(iter: I) -> $id { -+ iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) -+ } -+ } -+ -+ impl<'a> crate::iter::Product<&'a $id> for $id { -+ #[inline] -+ fn product>(iter: I) -> $id { -+ iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _reduction_float_arith>] { -+ use super::*; -+ fn alternating(x: usize) -> $id { -+ let mut v = $id::splat(1 as $elem_ty); -+ for i in 0..$id::lanes() { -+ if i % x == 0 { -+ v = v.replace(i, 2 as $elem_ty); -+ } -+ } -+ v -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn sum() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.sum(), 0 as $elem_ty); -+ let v = $id::splat(1 as $elem_ty); -+ assert_eq!(v.sum(), $id::lanes() as $elem_ty); -+ let v = alternating(2); -+ assert_eq!( -+ v.sum(), -+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty -+ ); -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn product() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.product(), 0 as $elem_ty); -+ let v = $id::splat(1 as $elem_ty); -+ assert_eq!(v.product(), 1 as $elem_ty); -+ let f = match $id::lanes() { -+ 64 => 16, -+ 32 => 8, -+ 16 => 4, -+ _ => 2, -+ }; -+ let v = alternating(f); -+ assert_eq!( -+ v.product(), -+ (2_usize.pow(($id::lanes() / f) as u32) -+ as $elem_ty) -+ ); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[allow(unreachable_code)] -+ #[allow(unused_mut)] -+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 -+ fn sum_nan() { -+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/6 -+ return; -+ -+ let n0 = crate::$elem_ty::NAN; -+ let v0 = $id::splat(-3.0); -+ for i in 0..$id::lanes() { -+ let mut v = v0.replace(i, n0); -+ // If the vector contains a NaN the result is NaN: -+ assert!( -+ v.sum().is_nan(), -+ "nan at {} => {} | {:?}", -+ i, -+ v.sum(), -+ v -+ ); -+ for j in 0..i { -+ v = v.replace(j, n0); -+ assert!(v.sum().is_nan()); -+ } -+ } -+ let v = $id::splat(n0); -+ assert!(v.sum().is_nan(), "all nans | {:?}", v); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[allow(unreachable_code)] -+ #[allow(unused_mut)] -+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 -+ fn product_nan() { -+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/6 -+ return; -+ -+ let n0 = crate::$elem_ty::NAN; -+ let v0 = $id::splat(-3.0); -+ for i in 0..$id::lanes() { -+ let mut v = v0.replace(i, n0); -+ // If the vector contains a NaN the result is NaN: -+ assert!( -+ v.product().is_nan(), -+ "nan at {} => {} | {:?}", -+ i, -+ v.product(), -+ v -+ ); -+ for j in 0..i { -+ v = v.replace(j, n0); -+ assert!(v.product().is_nan()); -+ } -+ } -+ let v = $id::splat(n0); -+ assert!(v.product().is_nan(), "all nans | {:?}", v); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[allow(unused, dead_code)] -+ fn sum_roundoff() { -+ // Performs a tree-reduction -+ fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { -+ assert!(!a.is_empty()); -+ if a.len() == 1 { -+ a[0] -+ } else if a.len() == 2 { -+ a[0] + a[1] -+ } else { -+ let mid = a.len() / 2; -+ let (left, right) = a.split_at(mid); -+ tree_reduce_sum(left) + tree_reduce_sum(right) -+ } -+ } -+ -+ let mut start = crate::$elem_ty::EPSILON; -+ let mut scalar_reduction = 0. as $elem_ty; -+ -+ let mut v = $id::splat(0. as $elem_ty); -+ for i in 0..$id::lanes() { -+ let c = if i % 2 == 0 { 1e3 } else { -1. }; -+ start *= 3.14 * c; -+ scalar_reduction += start; -+ v = v.replace(i, start); -+ } -+ let simd_reduction = v.sum(); -+ -+ let mut a = [0. as $elem_ty; $id::lanes()]; -+ v.write_to_slice_unaligned(&mut a); -+ let tree_reduction = tree_reduce_sum(&a); -+ -+ // tolerate 1 ULP difference: -+ let red_bits = simd_reduction.to_bits(); -+ let tree_bits = tree_reduction.to_bits(); -+ assert!( -+ if red_bits > tree_bits { -+ red_bits - tree_bits -+ } else { -+ tree_bits - red_bits -+ } < 2, -+ "vector: {:?} | simd_reduction: {:?} | \ -+ tree_reduction: {} | scalar_reduction: {}", -+ v, -+ simd_reduction, -+ tree_reduction, -+ scalar_reduction -+ ); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[allow(unused, dead_code)] -+ fn product_roundoff() { -+ // Performs a tree-reduction -+ fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { -+ assert!(!a.is_empty()); -+ if a.len() == 1 { -+ a[0] -+ } else if a.len() == 2 { -+ a[0] * a[1] -+ } else { -+ let mid = a.len() / 2; -+ let (left, right) = a.split_at(mid); -+ tree_reduce_product(left) -+ * tree_reduce_product(right) -+ } -+ } -+ -+ let mut start = crate::$elem_ty::EPSILON; -+ let mut scalar_reduction = 1. as $elem_ty; -+ -+ let mut v = $id::splat(0. as $elem_ty); -+ for i in 0..$id::lanes() { -+ let c = if i % 2 == 0 { 1e3 } else { -1. }; -+ start *= 3.14 * c; -+ scalar_reduction *= start; -+ v = v.replace(i, start); -+ } -+ let simd_reduction = v.product(); -+ -+ let mut a = [0. as $elem_ty; $id::lanes()]; -+ v.write_to_slice_unaligned(&mut a); -+ let tree_reduction = tree_reduce_product(&a); -+ -+ // tolerate 1 ULP difference: -+ let red_bits = simd_reduction.to_bits(); -+ let tree_bits = tree_reduction.to_bits(); -+ assert!( -+ if red_bits > tree_bits { -+ red_bits - tree_bits -+ } else { -+ tree_bits - red_bits -+ } < 2, -+ "vector: {:?} | simd_reduction: {:?} | \ -+ tree_reduction: {} | scalar_reduction: {}", -+ v, -+ simd_reduction, -+ tree_reduction, -+ scalar_reduction -+ ); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs -new file mode 100644 -index 000000000000..91dffad31032 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs -@@ -0,0 +1,197 @@ -+//! Implements portable horizontal integer vector arithmetic reductions. -+ -+macro_rules! impl_reduction_integer_arithmetic { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident -+ | $test_tt:tt) => { -+ impl $id { -+ /// Horizontal wrapping sum of the vector elements. -+ /// -+ /// The intrinsic performs a tree-reduction of the vector elements. -+ /// That is, for an 8 element vector: -+ /// -+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) -+ /// -+ /// If an operation overflows it returns the mathematical result -+ /// modulo `2^n` where `n` is the number of times it overflows. -+ #[inline] -+ pub fn wrapping_sum(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_add_ordered; -+ let v: $ielem_ty = unsafe { -+ simd_reduce_add_ordered(self.0, 0 as $ielem_ty) -+ }; -+ v as $elem_ty -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x = x.wrapping_add(self.extract(i) as $elem_ty); -+ } -+ x -+ } -+ } -+ -+ /// Horizontal wrapping product of the vector elements. -+ /// -+ /// The intrinsic performs a tree-reduction of the vector elements. -+ /// That is, for an 8 element vector: -+ /// -+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) -+ /// -+ /// If an operation overflows it returns the mathematical result -+ /// modulo `2^n` where `n` is the number of times it overflows. -+ #[inline] -+ pub fn wrapping_product(self) -> $elem_ty { -+ #[cfg(not(target_arch = "aarch64"))] -+ { -+ use crate::llvm::simd_reduce_mul_ordered; -+ let v: $ielem_ty = unsafe { -+ simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) -+ }; -+ v as $elem_ty -+ } -+ #[cfg(target_arch = "aarch64")] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ let mut x = self.extract(0) as $elem_ty; -+ for i in 1..$id::lanes() { -+ x = x.wrapping_mul(self.extract(i) as $elem_ty); -+ } -+ x -+ } -+ } -+ } -+ -+ impl crate::iter::Sum for $id { -+ #[inline] -+ fn sum>(iter: I) -> $id { -+ iter.fold($id::splat(0), crate::ops::Add::add) -+ } -+ } -+ -+ impl crate::iter::Product for $id { -+ #[inline] -+ fn product>(iter: I) -> $id { -+ iter.fold($id::splat(1), crate::ops::Mul::mul) -+ } -+ } -+ -+ impl<'a> crate::iter::Sum<&'a $id> for $id { -+ #[inline] -+ fn sum>(iter: I) -> $id { -+ iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) -+ } -+ } -+ -+ impl<'a> crate::iter::Product<&'a $id> for $id { -+ #[inline] -+ fn product>(iter: I) -> $id { -+ iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _reduction_int_arith>] { -+ use super::*; -+ -+ fn alternating(x: usize) -> $id { -+ let mut v = $id::splat(1 as $elem_ty); -+ for i in 0..$id::lanes() { -+ if i % x == 0 { -+ v = v.replace(i, 2 as $elem_ty); -+ } -+ } -+ v -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn wrapping_sum() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.wrapping_sum(), 0 as $elem_ty); -+ let v = $id::splat(1 as $elem_ty); -+ assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); -+ let v = alternating(2); -+ if $id::lanes() > 1 { -+ assert_eq!( -+ v.wrapping_sum(), -+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty -+ ); -+ } else { -+ assert_eq!( -+ v.wrapping_sum(), -+ 2 as $elem_ty -+ ); -+ } -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn wrapping_sum_overflow() { -+ let start = $elem_ty::max_value() -+ - ($id::lanes() as $elem_ty / 2); -+ -+ let v = $id::splat(start as $elem_ty); -+ let vwrapping_sum = v.wrapping_sum(); -+ -+ let mut wrapping_sum = start; -+ for _ in 1..$id::lanes() { -+ wrapping_sum = wrapping_sum.wrapping_add(start); -+ } -+ assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn wrapping_product() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.wrapping_product(), 0 as $elem_ty); -+ let v = $id::splat(1 as $elem_ty); -+ assert_eq!(v.wrapping_product(), 1 as $elem_ty); -+ let f = match $id::lanes() { -+ 64 => 16, -+ 32 => 8, -+ 16 => 4, -+ _ => 2, -+ }; -+ let v = alternating(f); -+ if $id::lanes() > 1 { -+ assert_eq!( -+ v.wrapping_product(), -+ (2_usize.pow(($id::lanes() / f) as u32) -+ as $elem_ty) -+ ); -+ } else { -+ assert_eq!( -+ v.wrapping_product(), -+ 2 as $elem_ty -+ ); -+ } -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn wrapping_product_overflow() { -+ let start = $elem_ty::max_value() -+ - ($id::lanes() as $elem_ty / 2); -+ -+ let v = $id::splat(start as $elem_ty); -+ let vmul = v.wrapping_product(); -+ -+ let mut mul = start; -+ for _ in 1..$id::lanes() { -+ mul = mul.wrapping_mul(start); -+ } -+ assert_eq!(mul, vmul, "v = {:?}", v); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs -new file mode 100644 -index 000000000000..0dd6a84e7e8d ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs -@@ -0,0 +1,89 @@ -+//! Implements portable horizontal mask reductions. -+ -+macro_rules! impl_reduction_mask { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Are `all` vector lanes `true`? -+ #[inline] -+ pub fn all(self) -> bool { -+ unsafe { crate::codegen::reductions::mask::All::all(self) } -+ } -+ /// Is `any` vector lane `true`? -+ #[inline] -+ pub fn any(self) -> bool { -+ unsafe { crate::codegen::reductions::mask::Any::any(self) } -+ } -+ /// Are `all` vector lanes `false`? -+ #[inline] -+ pub fn none(self) -> bool { -+ !self.any() -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _reduction>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn all() { -+ let a = $id::splat(true); -+ assert!(a.all()); -+ let a = $id::splat(false); -+ assert!(!a.all()); -+ -+ if $id::lanes() > 1 { -+ for i in 0..$id::lanes() { -+ let mut a = $id::splat(true); -+ a = a.replace(i, false); -+ assert!(!a.all()); -+ let mut a = $id::splat(false); -+ a = a.replace(i, true); -+ assert!(!a.all()); -+ } -+ } -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn any() { -+ let a = $id::splat(true); -+ assert!(a.any()); -+ let a = $id::splat(false); -+ assert!(!a.any()); -+ -+ if $id::lanes() > 1 { -+ for i in 0..$id::lanes() { -+ let mut a = $id::splat(true); -+ a = a.replace(i, false); -+ assert!(a.any()); -+ let mut a = $id::splat(false); -+ a = a.replace(i, true); -+ assert!(a.any()); -+ } -+ } -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn none() { -+ let a = $id::splat(true); -+ assert!(!a.none()); -+ let a = $id::splat(false); -+ assert!(a.none()); -+ -+ if $id::lanes() > 1 { -+ for i in 0..$id::lanes() { -+ let mut a = $id::splat(true); -+ a = a.replace(i, false); -+ assert!(!a.none()); -+ let mut a = $id::splat(false); -+ a = a.replace(i, true); -+ assert!(!a.none()); -+ } -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs -new file mode 100644 -index 000000000000..c4d3aa10f15c ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs -@@ -0,0 +1,377 @@ -+//! Implements portable horizontal vector min/max reductions. -+ -+macro_rules! impl_reduction_min_max { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident -+ | $ielem_ty:ident | $test_tt:tt) => { -+ impl $id { -+ /// Largest vector element value. -+ #[inline] -+ pub fn max_element(self) -> $elem_ty { -+ #[cfg(not(any( -+ target_arch = "aarch64", -+ target_arch = "arm", -+ target_arch = "powerpc64", -+ target_arch = "wasm32", -+ )))] -+ { -+ use crate::llvm::simd_reduce_max; -+ let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; -+ v as $elem_ty -+ } -+ #[cfg(any( -+ target_arch = "aarch64", -+ target_arch = "arm", -+ target_arch = "powerpc64", -+ target_arch = "wasm32", -+ ))] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ // FIXME: broken on WASM32 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/91 -+ let mut x = self.extract(0); -+ for i in 1..$id::lanes() { -+ x = x.max(self.extract(i)); -+ } -+ x -+ } -+ } -+ -+ /// Smallest vector element value. -+ #[inline] -+ pub fn min_element(self) -> $elem_ty { -+ #[cfg(not(any( -+ target_arch = "aarch64", -+ target_arch = "arm", -+ all(target_arch = "x86", not(target_feature = "sse2")), -+ target_arch = "powerpc64", -+ target_arch = "wasm32", -+ ),))] -+ { -+ use crate::llvm::simd_reduce_min; -+ let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; -+ v as $elem_ty -+ } -+ #[cfg(any( -+ target_arch = "aarch64", -+ target_arch = "arm", -+ all(target_arch = "x86", not(target_feature = "sse2")), -+ target_arch = "powerpc64", -+ target_arch = "wasm32", -+ ))] -+ { -+ // FIXME: broken on AArch64 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/15 -+ // FIXME: broken on i586-unknown-linux-gnu -+ // https://github.com/rust-lang-nursery/packed_simd/issues/22 -+ // FIXME: broken on WASM32 -+ // https://github.com/rust-lang-nursery/packed_simd/issues/91 -+ let mut x = self.extract(0); -+ for i in 1..$id::lanes() { -+ x = x.min(self.extract(i)); -+ } -+ x -+ } -+ } -+ } -+ test_if! {$test_tt: -+ paste::item! { -+ pub mod [<$id _reduction_min_max>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ pub fn max_element() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.max_element(), 0 as $elem_ty); -+ if $id::lanes() > 1 { -+ let v = v.replace(1, 1 as $elem_ty); -+ assert_eq!(v.max_element(), 1 as $elem_ty); -+ } -+ let v = v.replace(0, 2 as $elem_ty); -+ assert_eq!(v.max_element(), 2 as $elem_ty); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ pub fn min_element() { -+ let v = $id::splat(0 as $elem_ty); -+ assert_eq!(v.min_element(), 0 as $elem_ty); -+ if $id::lanes() > 1 { -+ let v = v.replace(1, 1 as $elem_ty); -+ assert_eq!(v.min_element(), 0 as $elem_ty); -+ } -+ let v = $id::splat(1 as $elem_ty); -+ let v = v.replace(0, 2 as $elem_ty); -+ if $id::lanes() > 1 { -+ assert_eq!(v.min_element(), 1 as $elem_ty); -+ } else { -+ assert_eq!(v.min_element(), 2 as $elem_ty); -+ } -+ if $id::lanes() > 1 { -+ let v = $id::splat(2 as $elem_ty); -+ let v = v.replace(1, 1 as $elem_ty); -+ assert_eq!(v.min_element(), 1 as $elem_ty); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! test_reduction_float_min_max { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if!{ -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _reduction_min_max_nan>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn min_element_test() { -+ let n = crate::$elem_ty::NAN; -+ -+ assert_eq!(n.min(-3.), -3.); -+ assert_eq!((-3. as $elem_ty).min(n), -3.); -+ -+ let v0 = $id::splat(-3.); -+ -+ let target_with_broken_last_lane_nan = !cfg!(any( -+ target_arch = "arm", target_arch = "aarch64", -+ all(target_arch = "x86", -+ not(target_feature = "sse2") -+ ), -+ target_arch = "powerpc64", -+ target_arch = "wasm32", -+ )); -+ -+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] -+ for i in 0..$id::lanes() { -+ // We replace the i-th element of the vector with -+ // `NaN`: [-3, -3, -3, NaN] -+ let mut v = v0.replace(i, n); -+ -+ // If the NaN is in the last place, the LLVM -+ // implementation of these methods is broken on some -+ // targets: -+ if i == $id::lanes() - 1 && -+ target_with_broken_last_lane_nan { -+ // FIXME: -+ // https://github.com/rust-lang-nursery/packed_simd/issues/5 -+ // -+ // If there is a NaN, the result should always -+ // the smallest element, but currently when the -+ // last element is NaN the current -+ // implementation incorrectly returns NaN. -+ // -+ // The targets mentioned above use different -+ // codegen that produces the correct result. -+ // -+ // These asserts detect if this behavior changes -+ assert!(v.min_element().is_nan(), -+ // FIXME: ^^^ should be -3. -+ "[A]: nan at {} => {} | {:?}", -+ i, v.min_element(), v); -+ -+ // If we replace all the elements in the vector -+ // up-to the `i-th` lane with `NaN`s, the result -+ // is still always `-3.` unless all elements of -+ // the vector are `NaN`s: -+ // -+ // This is also broken: -+ for j in 0..i { -+ v = v.replace(j, n); -+ assert!(v.min_element().is_nan(), -+ // FIXME: ^^^ should be -3. -+ "[B]: nan at {} => {} | {:?}", -+ i, v.min_element(), v); -+ } -+ -+ // We are done here, since we were in the last -+ // lane which is the last iteration of the loop. -+ break -+ } -+ -+ // We are not in the last lane, and there is only -+ // one `NaN` in the vector. -+ -+ // If the vector has one lane, the result is `NaN`: -+ if $id::lanes() == 1 { -+ assert!(v.min_element().is_nan(), -+ "[C]: all nans | v={:?} | min={} | \ -+ is_nan: {}", -+ v, v.min_element(), -+ v.min_element().is_nan() -+ ); -+ -+ // And we are done, since the vector only has -+ // one lane anyways. -+ break; -+ } -+ -+ // The vector has more than one lane, since there is -+ // only one `NaN` in the vector, the result is -+ // always `-3`. -+ assert_eq!(v.min_element(), -3., -+ "[D]: nan at {} => {} | {:?}", -+ i, v.min_element(), v); -+ -+ // If we replace all the elements in the vector -+ // up-to the `i-th` lane with `NaN`s, the result is -+ // still always `-3.` unless all elements of the -+ // vector are `NaN`s: -+ for j in 0..i { -+ v = v.replace(j, n); -+ -+ if i == $id::lanes() - 1 && j == i - 1 { -+ // All elements of the vector are `NaN`s, -+ // therefore the result is NaN as well. -+ // -+ // Note: the #lanes of the vector is > 1, so -+ // "i - 1" does not overflow. -+ assert!(v.min_element().is_nan(), -+ "[E]: all nans | v={:?} | min={} | \ -+ is_nan: {}", -+ v, v.min_element(), -+ v.min_element().is_nan()); -+ } else { -+ // There are non-`NaN` elements in the -+ // vector, therefore the result is `-3.`: -+ assert_eq!(v.min_element(), -3., -+ "[F]: nan at {} => {} | {:?}", -+ i, v.min_element(), v); -+ } -+ } -+ } -+ -+ // If the vector contains all NaNs the result is NaN: -+ assert!($id::splat(n).min_element().is_nan(), -+ "all nans | v={:?} | min={} | is_nan: {}", -+ $id::splat(n), $id::splat(n).min_element(), -+ $id::splat(n).min_element().is_nan()); -+ } -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn max_element_test() { -+ let n = crate::$elem_ty::NAN; -+ -+ assert_eq!(n.max(-3.), -3.); -+ assert_eq!((-3. as $elem_ty).max(n), -3.); -+ -+ let v0 = $id::splat(-3.); -+ -+ let target_with_broken_last_lane_nan = !cfg!(any( -+ target_arch = "arm", target_arch = "aarch64", -+ target_arch = "powerpc64", target_arch = "wasm32", -+ )); -+ -+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] -+ for i in 0..$id::lanes() { -+ // We replace the i-th element of the vector with -+ // `NaN`: [-3, -3, -3, NaN] -+ let mut v = v0.replace(i, n); -+ -+ // If the NaN is in the last place, the LLVM -+ // implementation of these methods is broken on some -+ // targets: -+ if i == $id::lanes() - 1 && -+ target_with_broken_last_lane_nan { -+ // FIXME: -+ // https://github.com/rust-lang-nursery/packed_simd/issues/5 -+ // -+ // If there is a NaN, the result should -+ // always the largest element, but currently -+ // when the last element is NaN the current -+ // implementation incorrectly returns NaN. -+ // -+ // The targets mentioned above use different -+ // codegen that produces the correct result. -+ // -+ // These asserts detect if this behavior -+ // changes -+ assert!(v.max_element().is_nan(), -+ // FIXME: ^^^ should be -3. -+ "[A]: nan at {} => {} | {:?}", -+ i, v.max_element(), v); -+ -+ // If we replace all the elements in the vector -+ // up-to the `i-th` lane with `NaN`s, the result -+ // is still always `-3.` unless all elements of -+ // the vector are `NaN`s: -+ // -+ // This is also broken: -+ for j in 0..i { -+ v = v.replace(j, n); -+ assert!(v.max_element().is_nan(), -+ // FIXME: ^^^ should be -3. -+ "[B]: nan at {} => {} | {:?}", -+ i, v.max_element(), v); -+ } -+ -+ // We are done here, since we were in the last -+ // lane which is the last iteration of the loop. -+ break -+ } -+ -+ // We are not in the last lane, and there is only -+ // one `NaN` in the vector. -+ -+ // If the vector has one lane, the result is `NaN`: -+ if $id::lanes() == 1 { -+ assert!(v.max_element().is_nan(), -+ "[C]: all nans | v={:?} | min={} | \ -+ is_nan: {}", -+ v, v.max_element(), -+ v.max_element().is_nan()); -+ -+ // And we are done, since the vector only has -+ // one lane anyways. -+ break; -+ } -+ -+ // The vector has more than one lane, since there is -+ // only one `NaN` in the vector, the result is -+ // always `-3`. -+ assert_eq!(v.max_element(), -3., -+ "[D]: nan at {} => {} | {:?}", -+ i, v.max_element(), v); -+ -+ // If we replace all the elements in the vector -+ // up-to the `i-th` lane with `NaN`s, the result is -+ // still always `-3.` unless all elements of the -+ // vector are `NaN`s: -+ for j in 0..i { -+ v = v.replace(j, n); -+ -+ if i == $id::lanes() - 1 && j == i - 1 { -+ // All elements of the vector are `NaN`s, -+ // therefore the result is NaN as well. -+ // -+ // Note: the #lanes of the vector is > 1, so -+ // "i - 1" does not overflow. -+ assert!(v.max_element().is_nan(), -+ "[E]: all nans | v={:?} | max={} | \ -+ is_nan: {}", -+ v, v.max_element(), -+ v.max_element().is_nan()); -+ } else { -+ // There are non-`NaN` elements in the -+ // vector, therefore the result is `-3.`: -+ assert_eq!(v.max_element(), -3., -+ "[F]: nan at {} => {} | {:?}", -+ i, v.max_element(), v); -+ } -+ } -+ } -+ -+ // If the vector contains all NaNs the result is NaN: -+ assert!($id::splat(n).max_element().is_nan(), -+ "all nans | v={:?} | max={} | is_nan: {}", -+ $id::splat(n), $id::splat(n).max_element(), -+ $id::splat(n).max_element().is_nan()); -+ } -+ } -+ } -+ } -+ } -+} -diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs -new file mode 100644 -index 000000000000..24525df56c73 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/select.rs -@@ -0,0 +1,75 @@ -+//! Implements mask's `select`. -+ -+/// Implements mask select method -+macro_rules! impl_select { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Selects elements of `a` and `b` using mask. -+ /// -+ /// The lanes of the result for which the mask is `true` contain -+ /// the values of `a`. The remaining lanes contain the values of -+ /// `b`. -+ #[inline] -+ pub fn select(self, a: Simd, b: Simd) -> Simd -+ where -+ T: sealed::SimdArray< -+ NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, -+ >, -+ { -+ use crate::llvm::simd_select; -+ Simd(unsafe { simd_select(self.0, a.0, b.0) }) -+ } -+ } -+ -+ test_select!(bool, $id, $id, (false, true) | $test_tt); -+ }; -+} -+ -+macro_rules! test_select { -+ ( -+ $elem_ty:ident, -+ $mask_ty:ident, -+ $vec_ty:ident,($small:expr, $large:expr) | -+ $test_tt:tt -+ ) => { -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$vec_ty _select>] { -+ use super::*; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn select() { -+ let o = $small as $elem_ty; -+ let t = $large as $elem_ty; -+ -+ let a = $vec_ty::splat(o); -+ let b = $vec_ty::splat(t); -+ let m = a.lt(b); -+ assert_eq!(m.select(a, b), a); -+ -+ let m = b.lt(a); -+ assert_eq!(m.select(b, a), a); -+ -+ let mut c = a; -+ let mut d = b; -+ let mut m_e = $mask_ty::splat(false); -+ for i in 0..$vec_ty::lanes() { -+ if i % 2 == 0 { -+ let c_tmp = c.extract(i); -+ c = c.replace(i, d.extract(i)); -+ d = d.replace(i, c_tmp); -+ } else { -+ m_e = m_e.replace(i, true); -+ } -+ } -+ -+ let m = c.lt(d); -+ assert_eq!(m_e, m); -+ assert_eq!(m.select(c, d), a); -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs -new file mode 100644 -index 000000000000..13a7fae5fcee ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/shuffle.rs -@@ -0,0 +1,190 @@ -+//! Implements portable vector shuffles with immediate indices. -+ -+// FIXME: comprehensive tests -+// https://github.com/rust-lang-nursery/packed_simd/issues/20 -+ -+/// Shuffles vector elements. -+/// -+/// This macro returns a new vector that contains a shuffle of the elements in -+/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, -+/// [indices...])`) input vectors. -+/// -+/// The type of `vec0` and `vec1` must be equal, and the element type of the -+/// resulting vector is the element type of the input vector. -+/// -+/// The number of `indices` must be a power-of-two in range `[0, 64)`, since -+/// currently, the largest vector supported by the library has 64 lanes. The -+/// length of the resulting vector equals the number of indices provided. -+/// -+/// The indices must be in range `[0, M * N)` where `M` is the number of input -+/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. -+/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, -+/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of -+/// `vec1`. -+/// -+/// # Examples -+/// -+/// Shuffling elements of two vectors: -+/// -+/// ``` -+/// # #[macro_use] -+/// # extern crate packed_simd; -+/// # use packed_simd::*; -+/// # fn main() { -+/// // Shuffle allows reordering the elements: -+/// let x = i32x4::new(1, 2, 3, 4); -+/// let y = i32x4::new(5, 6, 7, 8); -+/// let r = shuffle!(x, y, [4, 0, 5, 1]); -+/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); -+/// -+/// // The resulting vector can als be smaller than the input: -+/// let r = shuffle!(x, y, [1, 6]); -+/// assert_eq!(r, i32x2::new(2, 7)); -+/// -+/// // Or larger: -+/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); -+/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); -+/// // At most 2 * the number of lanes in the input vector. -+/// # } -+/// ``` -+/// -+/// Shuffling elements of one vector: -+/// -+/// ``` -+/// # #[macro_use] -+/// # extern crate packed_simd; -+/// # use packed_simd::*; -+/// # fn main() { -+/// // Shuffle allows reordering the elements of a vector: -+/// let x = i32x4::new(1, 2, 3, 4); -+/// let r = shuffle!(x, [2, 1, 3, 0]); -+/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); -+/// -+/// // The resulting vector can be smaller than the input: -+/// let r = shuffle!(x, [1, 3]); -+/// assert_eq!(r, i32x2::new(2, 4)); -+/// -+/// // Equal: -+/// let r = shuffle!(x, [1, 3, 2, 0]); -+/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); -+/// -+/// // Or larger: -+/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); -+/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); -+/// // At most 2 * the number of lanes in the input vector. -+/// # } -+/// ``` -+#[macro_export] -+macro_rules! shuffle { -+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector2( -+ $vec0.0, -+ $vec1.0, -+ [$l0, $l1], -+ )) -+ } -+ }}; -+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector4( -+ $vec0.0, -+ $vec1.0, -+ [$l0, $l1, $l2, $l3], -+ )) -+ } -+ }}; -+ ($vec0:expr, $vec1:expr, -+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, -+ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector8( -+ $vec0.0, -+ $vec1.0, -+ [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], -+ )) -+ } -+ }}; -+ ($vec0:expr, $vec1:expr, -+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, -+ $l4:expr, $l5:expr, $l6:expr, $l7:expr, -+ $l8:expr, $l9:expr, $l10:expr, $l11:expr, -+ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector16( -+ $vec0.0, -+ $vec1.0, -+ [ -+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, -+ $l11, $l12, $l13, $l14, $l15, -+ ], -+ )) -+ } -+ }}; -+ ($vec0:expr, $vec1:expr, -+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, -+ $l4:expr, $l5:expr, $l6:expr, $l7:expr, -+ $l8:expr, $l9:expr, $l10:expr, $l11:expr, -+ $l12:expr, $l13:expr, $l14:expr, $l15:expr, -+ $l16:expr, $l17:expr, $l18:expr, $l19:expr, -+ $l20:expr, $l21:expr, $l22:expr, $l23:expr, -+ $l24:expr, $l25:expr, $l26:expr, $l27:expr, -+ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector32( -+ $vec0.0, -+ $vec1.0, -+ [ -+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, -+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, -+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, -+ $l29, $l30, $l31, -+ ], -+ )) -+ } -+ }}; -+ ($vec0:expr, $vec1:expr, -+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, -+ $l4:expr, $l5:expr, $l6:expr, $l7:expr, -+ $l8:expr, $l9:expr, $l10:expr, $l11:expr, -+ $l12:expr, $l13:expr, $l14:expr, $l15:expr, -+ $l16:expr, $l17:expr, $l18:expr, $l19:expr, -+ $l20:expr, $l21:expr, $l22:expr, $l23:expr, -+ $l24:expr, $l25:expr, $l26:expr, $l27:expr, -+ $l28:expr, $l29:expr, $l30:expr, $l31:expr, -+ $l32:expr, $l33:expr, $l34:expr, $l35:expr, -+ $l36:expr, $l37:expr, $l38:expr, $l39:expr, -+ $l40:expr, $l41:expr, $l42:expr, $l43:expr, -+ $l44:expr, $l45:expr, $l46:expr, $l47:expr, -+ $l48:expr, $l49:expr, $l50:expr, $l51:expr, -+ $l52:expr, $l53:expr, $l54:expr, $l55:expr, -+ $l56:expr, $l57:expr, $l58:expr, $l59:expr, -+ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ -+ #[allow(unused_unsafe)] -+ unsafe { -+ $crate::Simd($crate::__shuffle_vector64( -+ $vec0.0, -+ $vec1.0, -+ [ -+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, -+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, -+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, -+ $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, -+ $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, -+ $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, -+ $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, -+ ], -+ )) -+ } -+ }}; -+ ($vec:expr, [$($l:expr),*]) => { -+ match $vec { -+ v => shuffle!(v, v, [$($l),*]) -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs -new file mode 100644 -index 000000000000..64536be6cba1 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs -@@ -0,0 +1,159 @@ -+//! Shuffle vector elements according to a dynamic vector of indices. -+ -+macro_rules! impl_shuffle1_dyn { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Shuffle vector elements according to `indices`. -+ #[inline] -+ pub fn shuffle1_dyn(self, indices: I) -> Self -+ where -+ Self: codegen::shuffle1_dyn::Shuffle1Dyn, -+ { -+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) -+ } -+ } -+ }; -+} -+ -+macro_rules! test_shuffle1_dyn { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _shuffle1_dyn>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn shuffle1_dyn() { -+ let increasing = { -+ let mut v = $id::splat(0 as $elem_ty); -+ for i in 0..$id::lanes() { -+ v = v.replace(i, i as $elem_ty); -+ } -+ v -+ }; -+ let decreasing = { -+ let mut v = $id::splat(0 as $elem_ty); -+ for i in 0..$id::lanes() { -+ v = v.replace( -+ i, -+ ($id::lanes() - 1 - i) as $elem_ty -+ ); -+ } -+ v -+ }; -+ -+ type Indices = < -+ $id as codegen::shuffle1_dyn::Shuffle1Dyn -+ >::Indices; -+ let increasing_ids: Indices = increasing.cast(); -+ let decreasing_ids: Indices = decreasing.cast(); -+ -+ assert_eq!( -+ increasing.shuffle1_dyn(increasing_ids), -+ increasing, -+ "(i,i)=>i" -+ ); -+ assert_eq!( -+ decreasing.shuffle1_dyn(increasing_ids), -+ decreasing, -+ "(d,i)=>d" -+ ); -+ assert_eq!( -+ increasing.shuffle1_dyn(decreasing_ids), -+ decreasing, -+ "(i,d)=>d" -+ ); -+ assert_eq!( -+ decreasing.shuffle1_dyn(decreasing_ids), -+ increasing, -+ "(d,d)=>i" -+ ); -+ -+ for i in 0..$id::lanes() { -+ let v_ids: Indices -+ = $id::splat(i as $elem_ty).cast(); -+ assert_eq!(increasing.shuffle1_dyn(v_ids), -+ $id::splat(increasing.extract(i)) -+ ); -+ assert_eq!(decreasing.shuffle1_dyn(v_ids), -+ $id::splat(decreasing.extract(i)) -+ ); -+ assert_eq!( -+ $id::splat(i as $elem_ty) -+ .shuffle1_dyn(increasing_ids), -+ $id::splat(i as $elem_ty) -+ ); -+ assert_eq!( -+ $id::splat(i as $elem_ty) -+ .shuffle1_dyn(decreasing_ids), -+ $id::splat(i as $elem_ty) -+ ); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! test_shuffle1_dyn_mask { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _shuffle1_dyn>] { -+ use super::*; -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn shuffle1_dyn() { -+ // alternating = [true, false, true, false, ...] -+ let mut alternating = $id::splat(false); -+ for i in 0..$id::lanes() { -+ if i % 2 == 0 { -+ alternating = alternating.replace(i, true); -+ } -+ } -+ -+ type Indices = < -+ $id as codegen::shuffle1_dyn::Shuffle1Dyn -+ >::Indices; -+ // even = [0, 0, 2, 2, 4, 4, ..] -+ let even = { -+ let mut v = Indices::splat(0); -+ for i in 0..$id::lanes() { -+ if i % 2 == 0 { -+ v = v.replace(i, (i as u8).into()); -+ } else { -+ v = v.replace(i, (i as u8 - 1).into()); -+ } -+ } -+ v -+ }; -+ // odd = [1, 1, 3, 3, 5, 5, ...] -+ let odd = { -+ let mut v = Indices::splat(0); -+ for i in 0..$id::lanes() { -+ if i % 2 != 0 { -+ v = v.replace(i, (i as u8).into()); -+ } else { -+ v = v.replace(i, (i as u8 + 1).into()); -+ } -+ } -+ v -+ }; -+ -+ assert_eq!( -+ alternating.shuffle1_dyn(even), -+ $id::splat(true) -+ ); -+ if $id::lanes() > 1 { -+ assert_eq!( -+ alternating.shuffle1_dyn(odd), -+ $id::splat(false) -+ ); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs -new file mode 100644 -index 000000000000..526b848b5c06 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/slice.rs -@@ -0,0 +1,7 @@ -+//! Slice from/to methods -+ -+#[macro_use] -+mod from_slice; -+ -+#[macro_use] -+mod write_to_slice; -diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs -new file mode 100644 -index 000000000000..109cd1f10b01 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs -@@ -0,0 +1,216 @@ -+//! Implements methods to read a vector type from a slice. -+ -+macro_rules! impl_slice_from_slice { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned -+ /// to an `align_of::()` boundary. -+ #[inline] -+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; -+ assert_eq!( -+ target_ptr -+ .align_offset(crate::mem::align_of::()), -+ 0 -+ ); -+ Self::from_slice_aligned_unchecked(slice) -+ } -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()`. -+ #[inline] -+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ Self::from_slice_unaligned_unchecked(slice) -+ } -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned -+ /// to an `align_of::()` boundary, the behavior is undefined. -+ #[inline] -+ pub unsafe fn from_slice_aligned_unchecked( -+ slice: &[$elem_ty], -+ ) -> Self { -+ debug_assert!(slice.len() >= $elem_count); -+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; -+ debug_assert_eq!( -+ target_ptr.align_offset(crate::mem::align_of::()), -+ 0 -+ ); -+ -+ #[allow(clippy::cast_ptr_alignment)] -+ *(target_ptr as *const Self) -+ } -+ -+ /// Instantiates a new vector with the values of the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn from_slice_unaligned_unchecked( -+ slice: &[$elem_ty], -+ ) -> Self { -+ use crate::mem::size_of; -+ debug_assert!(slice.len() >= $elem_count); -+ let target_ptr = -+ slice.get_unchecked(0) as *const $elem_ty as *const u8; -+ let mut x = Self::splat(0 as $elem_ty); -+ let self_ptr = &mut x as *mut Self as *mut u8; -+ crate::ptr::copy_nonoverlapping( -+ target_ptr, -+ self_ptr, -+ size_of::(), -+ ); -+ x -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _slice_from_slice>] { -+ use super::*; -+ use crate::iter::Iterator; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_slice_unaligned() { -+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; -+ unaligned[0] = 0 as $elem_ty; -+ let vec = $id::from_slice_unaligned(&unaligned[1..]); -+ for (index, &b) in unaligned.iter().enumerate() { -+ if index == 0 { -+ assert_eq!(b, 0 as $elem_ty); -+ } else { -+ assert_eq!(b, 42 as $elem_ty); -+ assert_eq!(b, vec.extract(index - 1)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_unaligned_fail() { -+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; -+ unaligned[0] = 0 as $elem_ty; -+ // the slice is not large enough => panic -+ let _vec = $id::from_slice_unaligned(&unaligned[2..]); -+ } -+ -+ union A { -+ data: [$elem_ty; 2 * $id::lanes()], -+ _vec: $id, -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_slice_aligned() { -+ let mut aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ for i in $id::lanes()..(2 * $id::lanes()) { -+ unsafe { -+ aligned.data[i] = 42 as $elem_ty; -+ } -+ } -+ -+ let vec = unsafe { -+ $id::from_slice_aligned( -+ &aligned.data[$id::lanes()..] -+ ) -+ }; -+ for (index, &b) in -+ unsafe { aligned.data.iter().enumerate() } { -+ if index < $id::lanes() { -+ assert_eq!(b, 0 as $elem_ty); -+ } else { -+ assert_eq!(b, 42 as $elem_ty); -+ assert_eq!( -+ b, vec.extract(index - $id::lanes()) -+ ); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_aligned_fail_lanes() { -+ let aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ let _vec = unsafe { -+ $id::from_slice_aligned( -+ &aligned.data[2 * $id::lanes()..] -+ ) -+ }; -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn from_slice_aligned_fail_align() { -+ unsafe { -+ let aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ -+ // get a pointer to the front of data -+ let ptr: *const $elem_ty = aligned.data.as_ptr() -+ as *const $elem_ty; -+ // offset pointer by one element -+ let ptr = ptr.wrapping_add(1); -+ -+ if ptr.align_offset( -+ crate::mem::align_of::<$id>() -+ ) == 0 { -+ // the pointer is properly aligned, so -+ // from_slice_aligned won't fail here (e.g. this -+ // can happen for i128x1). So we panic to make -+ // the "should_fail" test pass: -+ panic!("ok"); -+ } -+ -+ // create a slice - this is safe, because the -+ // elements of the slice exist, are properly -+ // initialized, and properly aligned: -+ let s: &[$elem_ty] = slice::from_raw_parts( -+ ptr, $id::lanes() -+ ); -+ // this should always panic because the slice -+ // alignment does not match the alignment -+ // requirements for the vector type: -+ let _vec = $id::from_slice_aligned(s); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs -new file mode 100644 -index 000000000000..fcb288da70fc ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs -@@ -0,0 +1,211 @@ -+//! Implements methods to write a vector type to a slice. -+ -+macro_rules! impl_slice_write_to_slice { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not -+ /// aligned to an `align_of::()` boundary. -+ #[inline] -+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ let target_ptr = -+ slice.get_unchecked_mut(0) as *mut $elem_ty; -+ assert_eq!( -+ target_ptr -+ .align_offset(crate::mem::align_of::()), -+ 0 -+ ); -+ self.write_to_slice_aligned_unchecked(slice); -+ } -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Panics -+ /// -+ /// If `slice.len() < Self::lanes()`. -+ #[inline] -+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { -+ unsafe { -+ assert!(slice.len() >= $elem_count); -+ self.write_to_slice_unaligned_unchecked(slice); -+ } -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not -+ /// aligned to an `align_of::()` boundary, the behavior is -+ /// undefined. -+ #[inline] -+ pub unsafe fn write_to_slice_aligned_unchecked( -+ self, slice: &mut [$elem_ty], -+ ) { -+ debug_assert!(slice.len() >= $elem_count); -+ let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; -+ debug_assert_eq!( -+ target_ptr.align_offset(crate::mem::align_of::()), -+ 0 -+ ); -+ -+ #[allow(clippy::cast_ptr_alignment)] -+ #[allow(clippy::cast_ptr_alignment)] -+ #[allow(clippy::cast_ptr_alignment)] -+ #[allow(clippy::cast_ptr_alignment)] -+ *(target_ptr as *mut Self) = self; -+ } -+ -+ /// Writes the values of the vector to the `slice`. -+ /// -+ /// # Precondition -+ /// -+ /// If `slice.len() < Self::lanes()` the behavior is undefined. -+ #[inline] -+ pub unsafe fn write_to_slice_unaligned_unchecked( -+ self, slice: &mut [$elem_ty], -+ ) { -+ debug_assert!(slice.len() >= $elem_count); -+ let target_ptr = -+ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; -+ let self_ptr = &self as *const Self as *const u8; -+ crate::ptr::copy_nonoverlapping( -+ self_ptr, -+ target_ptr, -+ crate::mem::size_of::(), -+ ); -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item! { -+ pub mod [<$id _slice_write_to_slice>] { -+ use super::*; -+ use crate::iter::Iterator; -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn write_to_slice_unaligned() { -+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; -+ let vec = $id::splat(42 as $elem_ty); -+ vec.write_to_slice_unaligned(&mut unaligned[1..]); -+ for (index, &b) in unaligned.iter().enumerate() { -+ if index == 0 { -+ assert_eq!(b, 0 as $elem_ty); -+ } else { -+ assert_eq!(b, 42 as $elem_ty); -+ assert_eq!(b, vec.extract(index - 1)); -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_unaligned_fail() { -+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; -+ let vec = $id::splat(42 as $elem_ty); -+ vec.write_to_slice_unaligned(&mut unaligned[2..]); -+ } -+ -+ union A { -+ data: [$elem_ty; 2 * $id::lanes()], -+ _vec: $id, -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] -+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn write_to_slice_aligned() { -+ let mut aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ let vec = $id::splat(42 as $elem_ty); -+ unsafe { -+ vec.write_to_slice_aligned( -+ &mut aligned.data[$id::lanes()..] -+ ); -+ for (idx, &b) in aligned.data.iter().enumerate() { -+ if idx < $id::lanes() { -+ assert_eq!(b, 0 as $elem_ty); -+ } else { -+ assert_eq!(b, 42 as $elem_ty); -+ assert_eq!( -+ b, vec.extract(idx - $id::lanes()) -+ ); -+ } -+ } -+ } -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_aligned_fail_lanes() { -+ let mut aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ let vec = $id::splat(42 as $elem_ty); -+ unsafe { -+ vec.write_to_slice_aligned( -+ &mut aligned.data[2 * $id::lanes()..] -+ ) -+ }; -+ } -+ -+ // FIXME: wasm-bindgen-test does not support #[should_panic] -+ // #[cfg_attr(not(target_arch = "wasm32"), test)] -+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ #[cfg(not(target_arch = "wasm32"))] -+ #[test] -+ #[should_panic] -+ fn write_to_slice_aligned_fail_align() { -+ unsafe { -+ let mut aligned = A { -+ data: [0 as $elem_ty; 2 * $id::lanes()], -+ }; -+ -+ // get a pointer to the front of data -+ let ptr: *mut $elem_ty -+ = aligned.data.as_mut_ptr() as *mut $elem_ty; -+ // offset pointer by one element -+ let ptr = ptr.wrapping_add(1); -+ -+ if ptr.align_offset(crate::mem::align_of::<$id>()) -+ == 0 { -+ // the pointer is properly aligned, so -+ // write_to_slice_aligned won't fail here (e.g. -+ // this can happen for i128x1). So we panic to -+ // make the "should_fail" test pass: -+ panic!("ok"); -+ } -+ -+ // create a slice - this is safe, because the -+ // elements of the slice exist, are properly -+ // initialized, and properly aligned: -+ let s: &mut [$elem_ty] -+ = slice::from_raw_parts_mut(ptr, $id::lanes()); -+ // this should always panic because the slice -+ // alignment does not match the alignment -+ // requirements for the vector type: -+ let vec = $id::splat(42 as $elem_ty); -+ vec.write_to_slice_aligned(s); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs -new file mode 100644 -index 000000000000..53bba25bd311 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs -@@ -0,0 +1,192 @@ -+//! Horizontal swap bytes -+ -+macro_rules! impl_swap_bytes { -+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { -+ impl $id { -+ /// Reverses the byte order of the vector. -+ #[inline] -+ pub fn swap_bytes(self) -> Self { -+ super::codegen::swap_bytes::SwapBytes::swap_bytes(self) -+ } -+ -+ /// Converts self to little endian from the target's endianness. -+ /// -+ /// On little endian this is a no-op. On big endian the bytes are -+ /// swapped. -+ #[inline] -+ pub fn to_le(self) -> Self { -+ #[cfg(target_endian = "little")] -+ { -+ self -+ } -+ #[cfg(not(target_endian = "little"))] -+ { -+ self.swap_bytes() -+ } -+ } -+ -+ /// Converts self to big endian from the target's endianness. -+ /// -+ /// On big endian this is a no-op. On little endian the bytes are -+ /// swapped. -+ #[inline] -+ pub fn to_be(self) -> Self { -+ #[cfg(target_endian = "big")] -+ { -+ self -+ } -+ #[cfg(not(target_endian = "big"))] -+ { -+ self.swap_bytes() -+ } -+ } -+ -+ /// Converts a vector from little endian to the target's endianness. -+ /// -+ /// On little endian this is a no-op. On big endian the bytes are -+ /// swapped. -+ #[inline] -+ pub fn from_le(x: Self) -> Self { -+ #[cfg(target_endian = "little")] -+ { -+ x -+ } -+ #[cfg(not(target_endian = "little"))] -+ { -+ x.swap_bytes() -+ } -+ } -+ -+ /// Converts a vector from big endian to the target's endianness. -+ /// -+ /// On big endian this is a no-op. On little endian the bytes are -+ /// swapped. -+ #[inline] -+ pub fn from_be(x: Self) -> Self { -+ #[cfg(target_endian = "big")] -+ { -+ x -+ } -+ #[cfg(not(target_endian = "big"))] -+ { -+ x.swap_bytes() -+ } -+ } -+ } -+ -+ test_if! { -+ $test_tt: -+ paste::item_with_macros! { -+ pub mod [<$id _swap_bytes>] { -+ use super::*; -+ -+ const BYTES: [u8; 64] = [ -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ 16, 17, 18, 19, 20, 21, 22, 23, -+ 24, 25, 26, 27, 28, 29, 30, 31, -+ 32, 33, 34, 35, 36, 37, 38, 39, -+ 40, 41, 42, 43, 44, 45, 46, 47, -+ 48, 49, 50, 51, 52, 53, 54, 55, -+ 56, 57, 58, 59, 60, 61, 62, 63, -+ ]; -+ -+ macro_rules! swap { -+ ($func: ident) => {{ -+ // catch possible future >512 vectors -+ assert!(mem::size_of::<$id>() <= 64); -+ -+ let mut actual = BYTES; -+ let elems: &mut [$elem_ty] = unsafe { -+ slice::from_raw_parts_mut( -+ actual.as_mut_ptr() as *mut $elem_ty, -+ $id::lanes(), -+ ) -+ }; -+ -+ let vec = $id::from_slice_unaligned(elems); -+ $id::$func(vec).write_to_slice_unaligned(elems); -+ -+ actual -+ }}; -+ } -+ -+ macro_rules! test_swap { -+ ($func: ident) => {{ -+ let actual = swap!($func); -+ let expected = -+ BYTES.iter().rev() -+ .skip(64 - crate::mem::size_of::<$id>()); -+ assert!(actual.iter().zip(expected) -+ .all(|(x, y)| x == y)); -+ }}; -+ } -+ -+ macro_rules! test_no_swap { -+ ($func: ident) => {{ -+ let actual = swap!($func); -+ let expected = BYTES.iter() -+ .take(mem::size_of::<$id>()); -+ -+ assert!(actual.iter().zip(expected) -+ .all(|(x, y)| x == y)); -+ }}; -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn swap_bytes() { -+ test_swap!(swap_bytes); -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn to_le() { -+ #[cfg(target_endian = "little")] -+ { -+ test_no_swap!(to_le); -+ } -+ #[cfg(not(target_endian = "little"))] -+ { -+ test_swap!(to_le); -+ } -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn to_be() { -+ #[cfg(target_endian = "big")] -+ { -+ test_no_swap!(to_be); -+ } -+ #[cfg(not(target_endian = "big"))] -+ { -+ test_swap!(to_be); -+ } -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_le() { -+ #[cfg(target_endian = "little")] -+ { -+ test_no_swap!(from_le); -+ } -+ #[cfg(not(target_endian = "little"))] -+ { -+ test_swap!(from_le); -+ } -+ } -+ -+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+ fn from_be() { -+ #[cfg(target_endian = "big")] -+ { -+ test_no_swap!(from_be); -+ } -+ #[cfg(not(target_endian = "big"))] -+ { -+ test_swap!(from_be); -+ } -+ } -+ } -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs -new file mode 100644 -index 000000000000..b7ccd838603f ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen.rs -@@ -0,0 +1,59 @@ -+//! Code-generation utilities -+ -+crate mod bit_manip; -+crate mod llvm; -+crate mod math; -+crate mod reductions; -+crate mod shuffle; -+crate mod shuffle1_dyn; -+crate mod swap_bytes; -+ -+macro_rules! impl_simd_array { -+ ([$elem_ty:ident; $elem_count:expr]: -+ $tuple_id:ident | $($elem_tys:ident),*) => { -+ #[derive(Copy, Clone)] -+ #[repr(simd)] -+ pub struct $tuple_id($(crate $elem_tys),*); -+ //^^^^^^^ leaked through SimdArray -+ -+ impl crate::sealed::SimdArray for [$elem_ty; $elem_count] { -+ type Tuple = $tuple_id; -+ type T = $elem_ty; -+ const N: usize = $elem_count; -+ type NT = [u32; $elem_count]; -+ } -+ -+ impl crate::sealed::Simd for $tuple_id { -+ type Element = $elem_ty; -+ const LANES: usize = $elem_count; -+ type LanesType = [u32; $elem_count]; -+ } -+ -+ } -+} -+ -+crate mod pointer_sized_int; -+ -+crate mod v16; -+crate use self::v16::*; -+ -+crate mod v32; -+crate use self::v32::*; -+ -+crate mod v64; -+crate use self::v64::*; -+ -+crate mod v128; -+crate use self::v128::*; -+ -+crate mod v256; -+crate use self::v256::*; -+ -+crate mod v512; -+crate use self::v512::*; -+ -+crate mod vSize; -+crate use self::vSize::*; -+ -+crate mod vPtr; -+crate use self::vPtr::*; -diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs -new file mode 100644 -index 000000000000..947266f5bce8 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs -@@ -0,0 +1,354 @@ -+//! LLVM bit manipulation intrinsics. -+#![rustfmt::skip] -+ -+use crate::*; -+ -+#[allow(improper_ctypes, dead_code)] -+extern "C" { -+ #[link_name = "llvm.ctlz.v2i8"] -+ fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; -+ #[link_name = "llvm.ctlz.v4i8"] -+ fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; -+ #[link_name = "llvm.ctlz.v8i8"] -+ fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; -+ #[link_name = "llvm.ctlz.v16i8"] -+ fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; -+ #[link_name = "llvm.ctlz.v32i8"] -+ fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; -+ #[link_name = "llvm.ctlz.v64i8"] -+ fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; -+ -+ #[link_name = "llvm.ctlz.v2i16"] -+ fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; -+ #[link_name = "llvm.ctlz.v4i16"] -+ fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; -+ #[link_name = "llvm.ctlz.v8i16"] -+ fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; -+ #[link_name = "llvm.ctlz.v16i16"] -+ fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; -+ #[link_name = "llvm.ctlz.v32i16"] -+ fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; -+ -+ #[link_name = "llvm.ctlz.v2i32"] -+ fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; -+ #[link_name = "llvm.ctlz.v4i32"] -+ fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; -+ #[link_name = "llvm.ctlz.v8i32"] -+ fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; -+ #[link_name = "llvm.ctlz.v16i32"] -+ fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; -+ -+ #[link_name = "llvm.ctlz.v2i64"] -+ fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; -+ #[link_name = "llvm.ctlz.v4i64"] -+ fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; -+ #[link_name = "llvm.ctlz.v8i64"] -+ fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; -+ -+ #[link_name = "llvm.ctlz.v1i128"] -+ fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; -+ #[link_name = "llvm.ctlz.v2i128"] -+ fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; -+ #[link_name = "llvm.ctlz.v4i128"] -+ fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; -+ -+ #[link_name = "llvm.cttz.v2i8"] -+ fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; -+ #[link_name = "llvm.cttz.v4i8"] -+ fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; -+ #[link_name = "llvm.cttz.v8i8"] -+ fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; -+ #[link_name = "llvm.cttz.v16i8"] -+ fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; -+ #[link_name = "llvm.cttz.v32i8"] -+ fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; -+ #[link_name = "llvm.cttz.v64i8"] -+ fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; -+ -+ #[link_name = "llvm.cttz.v2i16"] -+ fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; -+ #[link_name = "llvm.cttz.v4i16"] -+ fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; -+ #[link_name = "llvm.cttz.v8i16"] -+ fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; -+ #[link_name = "llvm.cttz.v16i16"] -+ fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; -+ #[link_name = "llvm.cttz.v32i16"] -+ fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; -+ -+ #[link_name = "llvm.cttz.v2i32"] -+ fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; -+ #[link_name = "llvm.cttz.v4i32"] -+ fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; -+ #[link_name = "llvm.cttz.v8i32"] -+ fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; -+ #[link_name = "llvm.cttz.v16i32"] -+ fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; -+ -+ #[link_name = "llvm.cttz.v2i64"] -+ fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; -+ #[link_name = "llvm.cttz.v4i64"] -+ fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; -+ #[link_name = "llvm.cttz.v8i64"] -+ fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; -+ -+ #[link_name = "llvm.cttz.v1i128"] -+ fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; -+ #[link_name = "llvm.cttz.v2i128"] -+ fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; -+ #[link_name = "llvm.cttz.v4i128"] -+ fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; -+ -+ #[link_name = "llvm.ctpop.v2i8"] -+ fn ctpop_u8x2(x: u8x2) -> u8x2; -+ #[link_name = "llvm.ctpop.v4i8"] -+ fn ctpop_u8x4(x: u8x4) -> u8x4; -+ #[link_name = "llvm.ctpop.v8i8"] -+ fn ctpop_u8x8(x: u8x8) -> u8x8; -+ #[link_name = "llvm.ctpop.v16i8"] -+ fn ctpop_u8x16(x: u8x16) -> u8x16; -+ #[link_name = "llvm.ctpop.v32i8"] -+ fn ctpop_u8x32(x: u8x32) -> u8x32; -+ #[link_name = "llvm.ctpop.v64i8"] -+ fn ctpop_u8x64(x: u8x64) -> u8x64; -+ -+ #[link_name = "llvm.ctpop.v2i16"] -+ fn ctpop_u16x2(x: u16x2) -> u16x2; -+ #[link_name = "llvm.ctpop.v4i16"] -+ fn ctpop_u16x4(x: u16x4) -> u16x4; -+ #[link_name = "llvm.ctpop.v8i16"] -+ fn ctpop_u16x8(x: u16x8) -> u16x8; -+ #[link_name = "llvm.ctpop.v16i16"] -+ fn ctpop_u16x16(x: u16x16) -> u16x16; -+ #[link_name = "llvm.ctpop.v32i16"] -+ fn ctpop_u16x32(x: u16x32) -> u16x32; -+ -+ #[link_name = "llvm.ctpop.v2i32"] -+ fn ctpop_u32x2(x: u32x2) -> u32x2; -+ #[link_name = "llvm.ctpop.v4i32"] -+ fn ctpop_u32x4(x: u32x4) -> u32x4; -+ #[link_name = "llvm.ctpop.v8i32"] -+ fn ctpop_u32x8(x: u32x8) -> u32x8; -+ #[link_name = "llvm.ctpop.v16i32"] -+ fn ctpop_u32x16(x: u32x16) -> u32x16; -+ -+ #[link_name = "llvm.ctpop.v2i64"] -+ fn ctpop_u64x2(x: u64x2) -> u64x2; -+ #[link_name = "llvm.ctpop.v4i64"] -+ fn ctpop_u64x4(x: u64x4) -> u64x4; -+ #[link_name = "llvm.ctpop.v8i64"] -+ fn ctpop_u64x8(x: u64x8) -> u64x8; -+ -+ #[link_name = "llvm.ctpop.v1i128"] -+ fn ctpop_u128x1(x: u128x1) -> u128x1; -+ #[link_name = "llvm.ctpop.v2i128"] -+ fn ctpop_u128x2(x: u128x2) -> u128x2; -+ #[link_name = "llvm.ctpop.v4i128"] -+ fn ctpop_u128x4(x: u128x4) -> u128x4; -+} -+ -+crate trait BitManip { -+ fn ctpop(self) -> Self; -+ fn ctlz(self) -> Self; -+ fn cttz(self) -> Self; -+} -+ -+macro_rules! impl_bit_manip { -+ (inner: $ty:ident, $scalar:ty, $uty:ident, -+ $ctpop:ident, $ctlz:ident, $cttz:ident) => { -+ // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 -+ #[cfg(target_arch = "s390x")] -+ impl_bit_manip! { scalar: $ty, $scalar } -+ #[cfg(not(target_arch = "s390x"))] -+ impl BitManip for $ty { -+ #[inline] -+ fn ctpop(self) -> Self { -+ let y: $uty = self.cast(); -+ unsafe { $ctpop(y).cast() } -+ } -+ -+ #[inline] -+ fn ctlz(self) -> Self { -+ let y: $uty = self.cast(); -+ // the ctxx intrinsics need compile-time constant -+ // `is_zero_undef` -+ unsafe { $ctlz(y, false).cast() } -+ } -+ -+ #[inline] -+ fn cttz(self) -> Self { -+ let y: $uty = self.cast(); -+ unsafe { $cttz(y, false).cast() } -+ } -+ } -+ }; -+ (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { -+ #[cfg(target_arch = "s390x")] -+ impl_bit_manip! { scalar: $ty, $scalar } -+ #[cfg(not(target_arch = "s390x"))] -+ impl BitManip for $ty { -+ #[inline] -+ fn ctpop(self) -> Self { -+ let y: $uty = self.cast(); -+ $uty::ctpop(y).cast() -+ } -+ -+ #[inline] -+ fn ctlz(self) -> Self { -+ let y: $uty = self.cast(); -+ $uty::ctlz(y).cast() -+ } -+ -+ #[inline] -+ fn cttz(self) -> Self { -+ let y: $uty = self.cast(); -+ $uty::cttz(y).cast() -+ } -+ } -+ }; -+ (scalar: $ty:ident, $scalar:ty) => { -+ impl BitManip for $ty { -+ #[inline] -+ fn ctpop(self) -> Self { -+ let mut ones = self; -+ for i in 0..Self::lanes() { -+ ones = ones -+ .replace(i, self.extract(i).count_ones() as $scalar); -+ } -+ ones -+ } -+ -+ #[inline] -+ fn ctlz(self) -> Self { -+ let mut lz = self; -+ for i in 0..Self::lanes() { -+ lz = lz.replace( -+ i, -+ self.extract(i).leading_zeros() as $scalar, -+ ); -+ } -+ lz -+ } -+ -+ #[inline] -+ fn cttz(self) -> Self { -+ let mut tz = self; -+ for i in 0..Self::lanes() { -+ tz = tz.replace( -+ i, -+ self.extract(i).trailing_zeros() as $scalar, -+ ); -+ } -+ tz -+ } -+ } -+ }; -+ ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, -+ $ctpop:ident, $ctlz:ident, $cttz:ident) => { -+ impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } -+ impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } -+ }; -+ (sized: $usize:ident, $uscalar:ty, $isize:ident, -+ $iscalar:ty, $ty:ident) => { -+ impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } -+ impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } -+ }; -+} -+ -+impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } -+impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } -+#[cfg(not(target_arch = "aarch64"))] // see below -+impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } -+impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } -+impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } -+impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } -+impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } -+impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } -+impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } -+impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } -+impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } -+impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } -+impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } -+impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } -+impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } -+impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } -+impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } -+impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } -+impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } -+impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } -+impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } -+ -+#[cfg(target_arch = "aarch64")] -+impl BitManip for u8x8 { -+ #[inline] -+ fn ctpop(self) -> Self { -+ let y: u8x8 = self.cast(); -+ unsafe { ctpop_u8x8(y).cast() } -+ } -+ -+ #[inline] -+ fn ctlz(self) -> Self { -+ let y: u8x8 = self.cast(); -+ unsafe { ctlz_u8x8(y, false).cast() } -+ } -+ -+ #[inline] -+ fn cttz(self) -> Self { -+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 -+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 -+ // intrinsics -+ let mut tz = self; -+ for i in 0..Self::lanes() { -+ tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); -+ } -+ tz -+ } -+} -+#[cfg(target_arch = "aarch64")] -+impl BitManip for i8x8 { -+ #[inline] -+ fn ctpop(self) -> Self { -+ let y: u8x8 = self.cast(); -+ unsafe { ctpop_u8x8(y).cast() } -+ } -+ -+ #[inline] -+ fn ctlz(self) -> Self { -+ let y: u8x8 = self.cast(); -+ unsafe { ctlz_u8x8(y, false).cast() } -+ } -+ -+ #[inline] -+ fn cttz(self) -> Self { -+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 -+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 -+ // intrinsics -+ let mut tz = self; -+ for i in 0..Self::lanes() { -+ tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); -+ } -+ tz -+ } -+} -+ -+cfg_if! { -+ if #[cfg(target_pointer_width = "8")] { -+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } -+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } -+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } -+ } else if #[cfg(target_pointer_width = "16")] { -+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } -+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } -+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } -+ } else if #[cfg(target_pointer_width = "32")] { -+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } -+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } -+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } -+ } else if #[cfg(target_pointer_width = "64")] { -+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } -+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } -+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs -new file mode 100644 -index 000000000000..91c2b0758dcf ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/llvm.rs -@@ -0,0 +1,99 @@ -+//! LLVM's platform intrinsics -+#![allow(dead_code)] -+ -+use crate::sealed::Shuffle; -+#[allow(unused_imports)] // FIXME: spurious warning? -+use crate::sealed::Simd; -+ -+// Shuffle intrinsics: expanded in users' crates, therefore public. -+extern "platform-intrinsic" { -+ // FIXME: Passing this intrinsics an `idx` array with an index that is -+ // out-of-bounds will produce a monomorphization-time error. -+ // https://github.com/rust-lang-nursery/packed_simd/issues/21 -+ pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 2], Output = U>; -+ -+ pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 4], Output = U>; -+ -+ pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 8], Output = U>; -+ -+ pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 16], Output = U>; -+ -+ pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 32], Output = U>; -+ -+ pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U -+ where -+ T: Simd, -+ ::Element: Shuffle<[u32; 64], Output = U>; -+} -+ -+pub use self::simd_shuffle16 as __shuffle_vector16; -+pub use self::simd_shuffle2 as __shuffle_vector2; -+pub use self::simd_shuffle32 as __shuffle_vector32; -+pub use self::simd_shuffle4 as __shuffle_vector4; -+pub use self::simd_shuffle64 as __shuffle_vector64; -+pub use self::simd_shuffle8 as __shuffle_vector8; -+ -+extern "platform-intrinsic" { -+ crate fn simd_eq(x: T, y: T) -> U; -+ crate fn simd_ne(x: T, y: T) -> U; -+ crate fn simd_lt(x: T, y: T) -> U; -+ crate fn simd_le(x: T, y: T) -> U; -+ crate fn simd_gt(x: T, y: T) -> U; -+ crate fn simd_ge(x: T, y: T) -> U; -+ -+ crate fn simd_insert(x: T, idx: u32, val: U) -> T; -+ crate fn simd_extract(x: T, idx: u32) -> U; -+ -+ crate fn simd_cast(x: T) -> U; -+ -+ crate fn simd_add(x: T, y: T) -> T; -+ crate fn simd_sub(x: T, y: T) -> T; -+ crate fn simd_mul(x: T, y: T) -> T; -+ crate fn simd_div(x: T, y: T) -> T; -+ crate fn simd_rem(x: T, y: T) -> T; -+ crate fn simd_shl(x: T, y: T) -> T; -+ crate fn simd_shr(x: T, y: T) -> T; -+ crate fn simd_and(x: T, y: T) -> T; -+ crate fn simd_or(x: T, y: T) -> T; -+ crate fn simd_xor(x: T, y: T) -> T; -+ -+ crate fn simd_reduce_add_unordered(x: T) -> U; -+ crate fn simd_reduce_mul_unordered(x: T) -> U; -+ crate fn simd_reduce_add_ordered(x: T, acc: U) -> U; -+ crate fn simd_reduce_mul_ordered(x: T, acc: U) -> U; -+ crate fn simd_reduce_min(x: T) -> U; -+ crate fn simd_reduce_max(x: T) -> U; -+ crate fn simd_reduce_min_nanless(x: T) -> U; -+ crate fn simd_reduce_max_nanless(x: T) -> U; -+ crate fn simd_reduce_and(x: T) -> U; -+ crate fn simd_reduce_or(x: T) -> U; -+ crate fn simd_reduce_xor(x: T) -> U; -+ crate fn simd_reduce_all(x: T) -> bool; -+ crate fn simd_reduce_any(x: T) -> bool; -+ -+ crate fn simd_select(m: M, a: T, b: T) -> T; -+ -+ crate fn simd_fmin(a: T, b: T) -> T; -+ crate fn simd_fmax(a: T, b: T) -> T; -+ -+ crate fn simd_fsqrt(a: T) -> T; -+ crate fn simd_fma(a: T, b: T, c: T) -> T; -+ -+ crate fn simd_gather(value: T, pointers: P, mask: M) -> T; -+ crate fn simd_scatter(value: T, pointers: P, mask: M); -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs -new file mode 100644 -index 000000000000..f3997c7f1135 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math.rs -@@ -0,0 +1,3 @@ -+//! Vertical math operations -+ -+crate mod float; -diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs -new file mode 100644 -index 000000000000..5e89bf6ae6b0 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float.rs -@@ -0,0 +1,18 @@ -+//! Vertical floating-point math operations. -+#![allow(clippy::useless_transmute)] -+ -+#[macro_use] -+crate mod macros; -+crate mod abs; -+crate mod cos; -+crate mod cos_pi; -+crate mod exp; -+crate mod ln; -+crate mod mul_add; -+crate mod mul_adde; -+crate mod powf; -+crate mod sin; -+crate mod sin_cos_pi; -+crate mod sin_pi; -+crate mod sqrt; -+crate mod sqrte; -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs -new file mode 100644 -index 000000000000..bc4421f61de2 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs -@@ -0,0 +1,103 @@ -+//! Vertical floating-point `fabs` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors fabs -+ -+use crate::*; -+ -+crate trait Abs { -+ fn abs(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.fabs.v2f32"] -+ fn fabs_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.fabs.v4f32"] -+ fn fabs_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.fabs.v8f32"] -+ fn fabs_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.fabs.v16f32"] -+ fn fabs_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit fabsgle elem vectors -+ #[link_name = "llvm.fabs.v1f64"] -+ fn fabs_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.fabs.v2f64"] -+ fn fabs_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.fabs.v4f64"] -+ fn fabs_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.fabs.v8f64"] -+ fn fabs_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.fabs.f32"] -+ fn fabs_f32(x: f32) -> f32; -+ #[link_name = "llvm.fabs.f64"] -+ fn fabs_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Abs, abs); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: fabs_f32); -+ impl_unary!(f32x4[f32; 4]: fabs_f32); -+ impl_unary!(f32x8[f32; 8]: fabs_f32); -+ impl_unary!(f32x16[f32; 16]: fabs_f32); -+ -+ impl_unary!(f64x2[f64; 2]: fabs_f64); -+ impl_unary!(f64x4[f64; 4]: fabs_f64); -+ impl_unary!(f64x8[f64; 8]: fabs_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); -+ -+ impl_unary!(f32x4: Sleef_fabsf4_avx2128); -+ impl_unary!(f32x8: Sleef_fabsf8_avx2); -+ impl_unary!(f64x2: Sleef_fabsd2_avx2128); -+ impl_unary!(f64x4: Sleef_fabsd4_avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); -+ -+ impl_unary!(f32x4: Sleef_fabsf4_sse4); -+ impl_unary!(f32x8: Sleef_fabsf8_avx); -+ impl_unary!(f64x2: Sleef_fabsd2_sse4); -+ impl_unary!(f64x4: Sleef_fabsd4_avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); -+ -+ impl_unary!(f32x4: Sleef_fabsf4_sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); -+ impl_unary!(f64x2: Sleef_fabsd2_sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); -+ } else { -+ impl_unary!(f32x2[f32; 2]: fabs_f32); -+ impl_unary!(f32x16: fabs_v16f32); -+ impl_unary!(f64x8: fabs_v8f64); -+ -+ impl_unary!(f32x4: fabs_v4f32); -+ impl_unary!(f32x8: fabs_v8f32); -+ impl_unary!(f64x2: fabs_v2f64); -+ impl_unary!(f64x4: fabs_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: fabs_f32); -+ impl_unary!(f32x4: fabs_v4f32); -+ impl_unary!(f32x8: fabs_v8f32); -+ impl_unary!(f32x16: fabs_v16f32); -+ -+ impl_unary!(f64x2: fabs_v2f64); -+ impl_unary!(f64x4: fabs_v4f64); -+ impl_unary!(f64x8: fabs_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs -new file mode 100644 -index 000000000000..50f6c16da255 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs -@@ -0,0 +1,103 @@ -+//! Vertical floating-point `cos` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vector cos -+ -+use crate::*; -+ -+crate trait Cos { -+ fn cos(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.cos.v2f32"] -+ fn cos_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.cos.v4f32"] -+ fn cos_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.cos.v8f32"] -+ fn cos_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.cos.v16f32"] -+ fn cos_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit cosgle elem vectors -+ #[link_name = "llvm.cos.v1f64"] -+ fn cos_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.cos.v2f64"] -+ fn cos_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.cos.v4f64"] -+ fn cos_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.cos.v8f64"] -+ fn cos_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.cos.f32"] -+ fn cos_f32(x: f32) -> f32; -+ #[link_name = "llvm.cos.f64"] -+ fn cos_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Cos, cos); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: cos_f32); -+ impl_unary!(f32x4[f32; 4]: cos_f32); -+ impl_unary!(f32x8[f32; 8]: cos_f32); -+ impl_unary!(f32x16[f32; 16]: cos_f32); -+ -+ impl_unary!(f64x2[f64; 2]: cos_f64); -+ impl_unary!(f64x4[f64; 4]: cos_f64); -+ impl_unary!(f64x8[f64; 8]: cos_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); -+ -+ impl_unary!(f32x4: Sleef_cosf4_u10avx2128); -+ impl_unary!(f32x8: Sleef_cosf8_u10avx2); -+ impl_unary!(f64x2: Sleef_cosd2_u10avx2128); -+ impl_unary!(f64x4: Sleef_cosd4_u10avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); -+ -+ impl_unary!(f32x4: Sleef_cosf4_u10sse4); -+ impl_unary!(f32x8: Sleef_cosf8_u10avx); -+ impl_unary!(f64x2: Sleef_cosd2_u10sse4); -+ impl_unary!(f64x4: Sleef_cosd4_u10avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); -+ -+ impl_unary!(f32x4: Sleef_cosf4_u10sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); -+ impl_unary!(f64x2: Sleef_cosd2_u10sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); -+ } else { -+ impl_unary!(f32x2[f32; 2]: cos_f32); -+ impl_unary!(f32x16: cos_v16f32); -+ impl_unary!(f64x8: cos_v8f64); -+ -+ impl_unary!(f32x4: cos_v4f32); -+ impl_unary!(f32x8: cos_v8f32); -+ impl_unary!(f64x2: cos_v2f64); -+ impl_unary!(f64x4: cos_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: cos_f32); -+ impl_unary!(f32x4: cos_v4f32); -+ impl_unary!(f32x8: cos_v8f32); -+ impl_unary!(f32x16: cos_v16f32); -+ -+ impl_unary!(f64x2: cos_v2f64); -+ impl_unary!(f64x4: cos_v4f64); -+ impl_unary!(f64x8: cos_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs -new file mode 100644 -index 000000000000..ebff5fd1c751 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs -@@ -0,0 +1,87 @@ -+//! Vertical floating-point `cos` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors cos_pi -+ -+use crate::*; -+ -+crate trait CosPi { -+ fn cos_pi(self) -> Self; -+} -+ -+gen_unary_impl_table!(CosPi, cos_pi); -+ -+macro_rules! impl_def { -+ ($vid:ident, $PI:path) => { -+ impl CosPi for $vid { -+ #[inline] -+ fn cos_pi(self) -> Self { -+ (self * Self::splat($PI)).cos() -+ } -+ } -+ }; -+} -+macro_rules! impl_def32 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f32::consts::PI); -+ }; -+} -+macro_rules! impl_def64 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f64::consts::PI); -+ }; -+} -+ -+cfg_if! { -+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); -+ -+ impl_unary!(f32x4: Sleef_cospif4_u05avx2128); -+ impl_unary!(f32x8: Sleef_cospif8_u05avx2); -+ impl_unary!(f64x2: Sleef_cospid2_u05avx2128); -+ impl_unary!(f64x4: Sleef_cospid4_u05avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); -+ -+ impl_unary!(f32x4: Sleef_cospif4_u05sse4); -+ impl_unary!(f32x8: Sleef_cospif8_u05avx); -+ impl_unary!(f64x2: Sleef_cospid2_u05sse4); -+ impl_unary!(f64x4: Sleef_cospid4_u05avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); -+ -+ impl_unary!(f32x4: Sleef_cospif4_u05sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); -+ impl_unary!(f64x2: Sleef_cospid2_u05sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+ } -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs -new file mode 100644 -index 000000000000..00d10e9fa644 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs -@@ -0,0 +1,112 @@ -+//! Vertical floating-point `exp` -+#![allow(unused)] -+ -+// FIXME 64-bit expgle elem vectors misexpg -+ -+use crate::*; -+ -+crate trait Exp { -+ fn exp(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.exp.v2f32"] -+ fn exp_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.exp.v4f32"] -+ fn exp_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.exp.v8f32"] -+ fn exp_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.exp.v16f32"] -+ fn exp_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit expgle elem vectors -+ #[link_name = "llvm.exp.v1f64"] -+ fn exp_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.exp.v2f64"] -+ fn exp_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.exp.v4f64"] -+ fn exp_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.exp.v8f64"] -+ fn exp_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.exp.f32"] -+ fn exp_f32(x: f32) -> f32; -+ #[link_name = "llvm.exp.f64"] -+ fn exp_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Exp, exp); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: exp_f32); -+ impl_unary!(f32x4[f32; 4]: exp_f32); -+ impl_unary!(f32x8[f32; 8]: exp_f32); -+ impl_unary!(f32x16[f32; 16]: exp_f32); -+ -+ impl_unary!(f64x2[f64; 2]: exp_f64); -+ impl_unary!(f64x4[f64; 4]: exp_f64); -+ impl_unary!(f64x8[f64; 8]: exp_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); -+ -+ impl_unary!(f32x4: Sleef_expf4_u10avx2128); -+ impl_unary!(f32x8: Sleef_expf8_u10avx2); -+ impl_unary!(f64x2: Sleef_expd2_u10avx2128); -+ impl_unary!(f64x4: Sleef_expd4_u10avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); -+ -+ impl_unary!(f32x4: Sleef_expf4_u10sse4); -+ impl_unary!(f32x8: Sleef_expf8_u10avx); -+ impl_unary!(f64x2: Sleef_expd2_u10sse4); -+ impl_unary!(f64x4: Sleef_expd4_u10avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); -+ -+ impl_unary!(f32x4: Sleef_expf4_u10sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); -+ impl_unary!(f64x2: Sleef_expd2_u10sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); -+ } else if #[cfg(target_feature = "sse2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); -+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); -+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); -+ -+ impl_unary!(f32x4: Sleef_expf4_u10sse2); -+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); -+ impl_unary!(f64x2: Sleef_expd2_u10sse2); -+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); -+ } else { -+ impl_unary!(f32x2[f32; 2]: exp_f32); -+ impl_unary!(f32x16: exp_v16f32); -+ impl_unary!(f64x8: exp_v8f64); -+ -+ impl_unary!(f32x4: exp_v4f32); -+ impl_unary!(f32x8: exp_v8f32); -+ impl_unary!(f64x2: exp_v2f64); -+ impl_unary!(f64x4: exp_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: exp_f32); -+ impl_unary!(f32x4: exp_v4f32); -+ impl_unary!(f32x8: exp_v8f32); -+ impl_unary!(f32x16: exp_v16f32); -+ -+ impl_unary!(f64x2: exp_v2f64); -+ impl_unary!(f64x4: exp_v4f64); -+ impl_unary!(f64x8: exp_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs -new file mode 100644 -index 000000000000..88a5a6c6c158 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs -@@ -0,0 +1,112 @@ -+//! Vertical floating-point `ln` -+#![allow(unused)] -+ -+// FIXME 64-bit lngle elem vectors mislng -+ -+use crate::*; -+ -+crate trait Ln { -+ fn ln(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.log.v2f32"] -+ fn ln_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.log.v4f32"] -+ fn ln_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.log.v8f32"] -+ fn ln_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.log.v16f32"] -+ fn ln_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit lngle elem vectors -+ #[link_name = "llvm.log.v1f64"] -+ fn ln_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.log.v2f64"] -+ fn ln_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.log.v4f64"] -+ fn ln_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.log.v8f64"] -+ fn ln_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.log.f32"] -+ fn ln_f32(x: f32) -> f32; -+ #[link_name = "llvm.log.f64"] -+ fn ln_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Ln, ln); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: ln_f32); -+ impl_unary!(f32x4[f32; 4]: ln_f32); -+ impl_unary!(f32x8[f32; 8]: ln_f32); -+ impl_unary!(f32x16[f32; 16]: ln_f32); -+ -+ impl_unary!(f64x2[f64; 2]: ln_f64); -+ impl_unary!(f64x4[f64; 4]: ln_f64); -+ impl_unary!(f64x8[f64; 8]: ln_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); -+ -+ impl_unary!(f32x4: Sleef_logf4_u10avx2128); -+ impl_unary!(f32x8: Sleef_logf8_u10avx2); -+ impl_unary!(f64x2: Sleef_logd2_u10avx2128); -+ impl_unary!(f64x4: Sleef_logd4_u10avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); -+ -+ impl_unary!(f32x4: Sleef_logf4_u10sse4); -+ impl_unary!(f32x8: Sleef_logf8_u10avx); -+ impl_unary!(f64x2: Sleef_logd2_u10sse4); -+ impl_unary!(f64x4: Sleef_logd4_u10avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); -+ -+ impl_unary!(f32x4: Sleef_logf4_u10sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); -+ impl_unary!(f64x2: Sleef_logd2_u10sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); -+ } else if #[cfg(target_feature = "sse2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); -+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); -+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); -+ -+ impl_unary!(f32x4: Sleef_logf4_u10sse2); -+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); -+ impl_unary!(f64x2: Sleef_logd2_u10sse2); -+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); -+ } else { -+ impl_unary!(f32x2[f32; 2]: ln_f32); -+ impl_unary!(f32x16: ln_v16f32); -+ impl_unary!(f64x8: ln_v8f64); -+ -+ impl_unary!(f32x4: ln_v4f32); -+ impl_unary!(f32x8: ln_v8f32); -+ impl_unary!(f64x2: ln_v2f64); -+ impl_unary!(f64x4: ln_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: ln_f32); -+ impl_unary!(f32x4: ln_v4f32); -+ impl_unary!(f32x8: ln_v8f32); -+ impl_unary!(f32x16: ln_v16f32); -+ -+ impl_unary!(f64x2: ln_v2f64); -+ impl_unary!(f64x4: ln_v4f64); -+ impl_unary!(f64x8: ln_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs -new file mode 100644 -index 000000000000..02d0ca3f5c7a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs -@@ -0,0 +1,559 @@ -+//! Utility macros -+#![allow(unused)] -+ -+ -+macro_rules! impl_unary_ { -+ // implementation mapping 1:1 -+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ transmute($fun(transmute(self))) -+ } -+ } -+ } -+ }; -+ // implementation mapping 1:1 for when `$fun` is a generic function -+ // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). -+ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ transmute($fun(self.0)) -+ } -+ } -+ } -+ }; -+ (scalar | $trait_id:ident, $trait_method:ident, -+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ union U { -+ vec: $vec_id, -+ scalars: [$sid; $scount], -+ } -+ let mut scalars = U { vec: self }.scalars; -+ for i in &mut scalars { -+ *i = $fun(*i); -+ } -+ U { scalars }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun twice on each of the vector halves: -+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vech_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ halves: [$vech_id; 2], -+ } -+ -+ let mut halves = U { vec: self }.halves; -+ -+ *halves.get_unchecked_mut(0) = -+ transmute($fun(transmute(*halves.get_unchecked(0)))); -+ *halves.get_unchecked_mut(1) = -+ transmute($fun(transmute(*halves.get_unchecked(1)))); -+ -+ U { halves }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun four times on each of the vector quarters: -+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vecq_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ quarters: [$vecq_id; 4], -+ } -+ -+ let mut quarters = U { vec: self }.quarters; -+ -+ *quarters.get_unchecked_mut(0) = -+ transmute($fun(transmute(*quarters.get_unchecked(0)))); -+ *quarters.get_unchecked_mut(1) = -+ transmute($fun(transmute(*quarters.get_unchecked(1)))); -+ *quarters.get_unchecked_mut(2) = -+ transmute($fun(transmute(*quarters.get_unchecked(2)))); -+ *quarters.get_unchecked_mut(3) = -+ transmute($fun(transmute(*quarters.get_unchecked(3)))); -+ -+ U { quarters }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun once on a vector twice as large: -+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vect_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self) -> Self { -+ unsafe { -+ use crate::mem::{transmute, uninitialized}; -+ -+ union U { -+ vec: [$vec_id; 2], -+ twice: $vect_id, -+ } -+ -+ let twice = U { vec: [self, uninitialized()] }.twice; -+ let twice = transmute($fun(transmute(twice))); -+ -+ *(U { twice }.vec.get_unchecked(0)) -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! gen_unary_impl_table { -+ ($trait_id:ident, $trait_method:ident) => { -+ macro_rules! impl_unary { -+ ($vid:ident: $fun:ident) => { -+ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[g]: $fun:ident) => { -+ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { -+ impl_unary_!( -+ scalar | $trait_id, -+ $trait_method, -+ $vid, -+ [$sid; $sc], -+ $fun -+ ); -+ }; -+ ($vid:ident[s]: $fun:ident) => { -+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { -+ impl_unary_!( -+ halves | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_h, -+ $fun -+ ); -+ }; -+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { -+ impl_unary_!( -+ quarter | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_q, -+ $fun -+ ); -+ }; -+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { -+ impl_unary_!( -+ twice | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_t, -+ $fun -+ ); -+ }; -+ } -+ }; -+} -+ -+macro_rules! impl_tertiary_ { -+ // implementation mapping 1:1 -+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self, z: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ transmute($fun( -+ transmute(self), -+ transmute(y), -+ transmute(z), -+ )) -+ } -+ } -+ } -+ }; -+ (scalar | $trait_id:ident, $trait_method:ident, -+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self, z: Self) -> Self { -+ unsafe { -+ union U { -+ vec: $vec_id, -+ scalars: [$sid; $scount], -+ } -+ let mut x = U { vec: self }.scalars; -+ let y = U { vec: y }.scalars; -+ let z = U { vec: z }.scalars; -+ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { -+ *i = $fun(*i, *y, *z); -+ } -+ U { vec: x }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun twice on each of the vector halves: -+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vech_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self, z: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ halves: [$vech_id; 2], -+ } -+ -+ let mut x_halves = U { vec: self }.halves; -+ let y_halves = U { vec: y }.halves; -+ let z_halves = U { vec: z }.halves; -+ -+ *x_halves.get_unchecked_mut(0) = transmute($fun( -+ transmute(*x_halves.get_unchecked(0)), -+ transmute(*y_halves.get_unchecked(0)), -+ transmute(*z_halves.get_unchecked(0)), -+ )); -+ *x_halves.get_unchecked_mut(1) = transmute($fun( -+ transmute(*x_halves.get_unchecked(1)), -+ transmute(*y_halves.get_unchecked(1)), -+ transmute(*z_halves.get_unchecked(1)), -+ )); -+ -+ U { halves: x_halves }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun four times on each of the vector quarters: -+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vecq_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self, z: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ quarters: [$vecq_id; 4], -+ } -+ -+ let mut x_quarters = U { vec: self }.quarters; -+ let y_quarters = U { vec: y }.quarters; -+ let z_quarters = U { vec: z }.quarters; -+ -+ *x_quarters.get_unchecked_mut(0) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(0)), -+ transmute(*y_quarters.get_unchecked(0)), -+ transmute(*z_quarters.get_unchecked(0)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(1) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(1)), -+ transmute(*y_quarters.get_unchecked(1)), -+ transmute(*z_quarters.get_unchecked(1)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(2) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(2)), -+ transmute(*y_quarters.get_unchecked(2)), -+ transmute(*z_quarters.get_unchecked(2)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(3) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(3)), -+ transmute(*y_quarters.get_unchecked(3)), -+ transmute(*z_quarters.get_unchecked(3)), -+ )); -+ -+ U { quarters: x_quarters }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun once on a vector twice as large: -+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vect_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self, z: Self) -> Self { -+ unsafe { -+ use crate::mem::{transmute, uninitialized}; -+ -+ union U { -+ vec: [$vec_id; 2], -+ twice: $vect_id, -+ } -+ -+ let x_twice = U { vec: [self, uninitialized()] }.twice; -+ let y_twice = U { vec: [y, uninitialized()] }.twice; -+ let z_twice = U { vec: [z, uninitialized()] }.twice; -+ let twice: $vect_id = transmute($fun( -+ transmute(x_twice), -+ transmute(y_twice), -+ transmute(z_twice), -+ )); -+ -+ *(U { twice }.vec.get_unchecked(0)) -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! gen_tertiary_impl_table { -+ ($trait_id:ident, $trait_method:ident) => { -+ macro_rules! impl_tertiary { -+ ($vid:ident: $fun:ident) => { -+ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { -+ impl_tertiary_!( -+ scalar | $trait_id, -+ $trait_method, -+ $vid, -+ [$sid; $sc], -+ $fun -+ ); -+ }; -+ ($vid:ident[s]: $fun:ident) => { -+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { -+ impl_tertiary_!( -+ halves | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_h, -+ $fun -+ ); -+ }; -+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { -+ impl_tertiary_!( -+ quarter | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_q, -+ $fun -+ ); -+ }; -+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { -+ impl_tertiary_!( -+ twice | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_t, -+ $fun -+ ); -+ }; -+ } -+ }; -+} -+ -+macro_rules! impl_binary_ { -+ // implementation mapping 1:1 -+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ transmute($fun(transmute(self), transmute(y))) -+ } -+ } -+ } -+ }; -+ (scalar | $trait_id:ident, $trait_method:ident, -+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self) -> Self { -+ unsafe { -+ union U { -+ vec: $vec_id, -+ scalars: [$sid; $scount], -+ } -+ let mut x = U { vec: self }.scalars; -+ let y = U { vec: y }.scalars; -+ for (x, y) in x.iter_mut().zip(&y) { -+ *x = $fun(*x, *y); -+ } -+ U { scalars: x }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun twice on each of the vector halves: -+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vech_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ halves: [$vech_id; 2], -+ } -+ -+ let mut x_halves = U { vec: self }.halves; -+ let y_halves = U { vec: y }.halves; -+ -+ *x_halves.get_unchecked_mut(0) = transmute($fun( -+ transmute(*x_halves.get_unchecked(0)), -+ transmute(*y_halves.get_unchecked(0)), -+ )); -+ *x_halves.get_unchecked_mut(1) = transmute($fun( -+ transmute(*x_halves.get_unchecked(1)), -+ transmute(*y_halves.get_unchecked(1)), -+ )); -+ -+ U { halves: x_halves }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun four times on each of the vector quarters: -+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vecq_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self) -> Self { -+ unsafe { -+ use crate::mem::transmute; -+ union U { -+ vec: $vec_id, -+ quarters: [$vecq_id; 4], -+ } -+ -+ let mut x_quarters = U { vec: self }.quarters; -+ let y_quarters = U { vec: y }.quarters; -+ -+ *x_quarters.get_unchecked_mut(0) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(0)), -+ transmute(*y_quarters.get_unchecked(0)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(1) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(1)), -+ transmute(*y_quarters.get_unchecked(1)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(2) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(2)), -+ transmute(*y_quarters.get_unchecked(2)), -+ )); -+ -+ *x_quarters.get_unchecked_mut(3) = transmute($fun( -+ transmute(*x_quarters.get_unchecked(3)), -+ transmute(*y_quarters.get_unchecked(3)), -+ )); -+ -+ U { quarters: x_quarters }.vec -+ } -+ } -+ } -+ }; -+ // implementation calling fun once on a vector twice as large: -+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, -+ $vect_id:ident, $fun:ident) => { -+ impl $trait_id for $vec_id { -+ #[inline] -+ fn $trait_method(self, y: Self) -> Self { -+ unsafe { -+ use crate::mem::{transmute, uninitialized}; -+ -+ union U { -+ vec: [$vec_id; 2], -+ twice: $vect_id, -+ } -+ -+ let x_twice = U { vec: [self, uninitialized()] }.twice; -+ let y_twice = U { vec: [y, uninitialized()] }.twice; -+ let twice: $vect_id = transmute($fun( -+ transmute(x_twice), -+ transmute(y_twice), -+ )); -+ -+ *(U { twice }.vec.get_unchecked(0)) -+ } -+ } -+ } -+ }; -+} -+ -+macro_rules! gen_binary_impl_table { -+ ($trait_id:ident, $trait_method:ident) => { -+ macro_rules! impl_binary { -+ ($vid:ident: $fun:ident) => { -+ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { -+ impl_binary_!( -+ scalar | $trait_id, -+ $trait_method, -+ $vid, -+ [$sid; $sc], -+ $fun -+ ); -+ }; -+ ($vid:ident[s]: $fun:ident) => { -+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); -+ }; -+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { -+ impl_binary_!( -+ halves | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_h, -+ $fun -+ ); -+ }; -+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { -+ impl_binary_!( -+ quarter | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_q, -+ $fun -+ ); -+ }; -+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { -+ impl_binary_!( -+ twice | $trait_id, -+ $trait_method, -+ $vid, -+ $vid_t, -+ $fun -+ ); -+ }; -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs -new file mode 100644 -index 000000000000..f48a57dc46c6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs -@@ -0,0 +1,109 @@ -+//! Vertical floating-point `mul_add` -+#![allow(unused)] -+use crate::*; -+ -+// FIXME: 64-bit 1 element mul_add -+ -+crate trait MulAdd { -+ fn mul_add(self, y: Self, z: Self) -> Self; -+} -+ -+#[cfg(not(target_arch = "s390x"))] -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.fma.v2f32"] -+ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; -+ #[link_name = "llvm.fma.v4f32"] -+ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; -+ #[link_name = "llvm.fma.v8f32"] -+ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; -+ #[link_name = "llvm.fma.v16f32"] -+ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; -+ /* FIXME 64-bit single elem vectors -+ #[link_name = "llvm.fma.v1f64"] -+ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.fma.v2f64"] -+ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; -+ #[link_name = "llvm.fma.v4f64"] -+ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; -+ #[link_name = "llvm.fma.v8f64"] -+ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; -+} -+ -+gen_tertiary_impl_table!(MulAdd, mul_add); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ macro_rules! impl_broken { -+ ($id:ident) => { -+ impl MulAdd for $id { -+ #[inline] -+ fn mul_add(self, y: Self, z: Self) -> Self { -+ self * y + z -+ } -+ } -+ }; -+ } -+ -+ impl_broken!(f32x2); -+ impl_broken!(f32x4); -+ impl_broken!(f32x8); -+ impl_broken!(f32x16); -+ -+ impl_broken!(f64x2); -+ impl_broken!(f64x4); -+ impl_broken!(f64x8); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); -+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); -+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); -+ -+ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); -+ impl_tertiary!(f32x8: Sleef_fmaf8_avx2); -+ impl_tertiary!(f64x2: Sleef_fmad2_avx2128); -+ impl_tertiary!(f64x4: Sleef_fmad4_avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); -+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); -+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); -+ -+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); -+ impl_tertiary!(f32x8: Sleef_fmaf8_avx); -+ impl_tertiary!(f64x2: Sleef_fmad2_sse4); -+ impl_tertiary!(f64x4: Sleef_fmad4_avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); -+ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); -+ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); -+ -+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); -+ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); -+ impl_tertiary!(f64x2: Sleef_fmad2_sse4); -+ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); -+ } else { -+ impl_tertiary!(f32x2: fma_v2f32); -+ impl_tertiary!(f32x16: fma_v16f32); -+ impl_tertiary!(f64x8: fma_v8f64); -+ -+ impl_tertiary!(f32x4: fma_v4f32); -+ impl_tertiary!(f32x8: fma_v8f32); -+ impl_tertiary!(f64x2: fma_v2f64); -+ impl_tertiary!(f64x4: fma_v4f64); -+ } -+ } -+ } else { -+ impl_tertiary!(f32x2: fma_v2f32); -+ impl_tertiary!(f32x4: fma_v4f32); -+ impl_tertiary!(f32x8: fma_v8f32); -+ impl_tertiary!(f32x16: fma_v16f32); -+ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors -+ impl_tertiary!(f64x2: fma_v2f64); -+ impl_tertiary!(f64x4: fma_v4f64); -+ impl_tertiary!(f64x8: fma_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs -new file mode 100644 -index 000000000000..8c41fb131d94 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs -@@ -0,0 +1,66 @@ -+//! Approximation for floating-point `mul_add` -+use crate::*; -+ -+// FIXME: 64-bit 1 element mul_adde -+ -+crate trait MulAddE { -+ fn mul_adde(self, y: Self, z: Self) -> Self; -+} -+ -+#[cfg(not(target_arch = "s390x"))] -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.fmuladd.v2f32"] -+ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; -+ #[link_name = "llvm.fmuladd.v4f32"] -+ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; -+ #[link_name = "llvm.fmuladd.v8f32"] -+ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; -+ #[link_name = "llvm.fmuladd.v16f32"] -+ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; -+ /* FIXME 64-bit single elem vectors -+ #[link_name = "llvm.fmuladd.v1f64"] -+ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.fmuladd.v2f64"] -+ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; -+ #[link_name = "llvm.fmuladd.v4f64"] -+ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; -+ #[link_name = "llvm.fmuladd.v8f64"] -+ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; -+} -+ -+macro_rules! impl_mul_adde { -+ ($id:ident : $fn:ident) => { -+ impl MulAddE for $id { -+ #[inline] -+ fn mul_adde(self, y: Self, z: Self) -> Self { -+ #[cfg(not(target_arch = "s390x"))] -+ { -+ use crate::mem::transmute; -+ unsafe { -+ transmute($fn( -+ transmute(self), -+ transmute(y), -+ transmute(z), -+ )) -+ } -+ } -+ #[cfg(target_arch = "s390x")] -+ { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ self * y + z -+ } -+ } -+ } -+ }; -+} -+ -+impl_mul_adde!(f32x2: fmuladd_v2f32); -+impl_mul_adde!(f32x4: fmuladd_v4f32); -+impl_mul_adde!(f32x8: fmuladd_v8f32); -+impl_mul_adde!(f32x16: fmuladd_v16f32); -+// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors -+impl_mul_adde!(f64x2: fmuladd_v2f64); -+impl_mul_adde!(f64x4: fmuladd_v4f64); -+impl_mul_adde!(f64x8: fmuladd_v8f64); -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs -new file mode 100644 -index 000000000000..bc15067d73a3 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs -@@ -0,0 +1,112 @@ -+//! Vertical floating-point `powf` -+#![allow(unused)] -+ -+// FIXME 64-bit powfgle elem vectors mispowfg -+ -+use crate::*; -+ -+crate trait Powf { -+ fn powf(self, x: Self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.pow.v2f32"] -+ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; -+ #[link_name = "llvm.pow.v4f32"] -+ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; -+ #[link_name = "llvm.pow.v8f32"] -+ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; -+ #[link_name = "llvm.pow.v16f32"] -+ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; -+ /* FIXME 64-bit powfgle elem vectors -+ #[link_name = "llvm.pow.v1f64"] -+ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.pow.v2f64"] -+ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; -+ #[link_name = "llvm.pow.v4f64"] -+ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; -+ #[link_name = "llvm.pow.v8f64"] -+ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.pow.f32"] -+ fn powf_f32(x: f32, y: f32) -> f32; -+ #[link_name = "llvm.pow.f64"] -+ fn powf_f64(x: f64, y: f64) -> f64; -+} -+ -+gen_binary_impl_table!(Powf, powf); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_binary!(f32x2[f32; 2]: powf_f32); -+ impl_binary!(f32x4[f32; 4]: powf_f32); -+ impl_binary!(f32x8[f32; 8]: powf_f32); -+ impl_binary!(f32x16[f32; 16]: powf_f32); -+ -+ impl_binary!(f64x2[f64; 2]: powf_f64); -+ impl_binary!(f64x4[f64; 4]: powf_f64); -+ impl_binary!(f64x8[f64; 8]: powf_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); -+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); -+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); -+ -+ impl_binary!(f32x4: Sleef_powf4_u10avx2128); -+ impl_binary!(f32x8: Sleef_powf8_u10avx2); -+ impl_binary!(f64x2: Sleef_powd2_u10avx2128); -+ impl_binary!(f64x4: Sleef_powd4_u10avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); -+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); -+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); -+ -+ impl_binary!(f32x4: Sleef_powf4_u10sse4); -+ impl_binary!(f32x8: Sleef_powf8_u10avx); -+ impl_binary!(f64x2: Sleef_powd2_u10sse4); -+ impl_binary!(f64x4: Sleef_powd4_u10avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); -+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); -+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); -+ -+ impl_binary!(f32x4: Sleef_powf4_u10sse4); -+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); -+ impl_binary!(f64x2: Sleef_powd2_u10sse4); -+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); -+ } else if #[cfg(target_feature = "sse2")] { -+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); -+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); -+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); -+ -+ impl_binary!(f32x4: Sleef_powf4_u10sse2); -+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); -+ impl_binary!(f64x2: Sleef_powd2_u10sse2); -+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); -+ } else { -+ impl_binary!(f32x2[f32; 2]: powf_f32); -+ impl_binary!(f32x4: powf_v4f32); -+ impl_binary!(f32x8: powf_v8f32); -+ impl_binary!(f32x16: powf_v16f32); -+ -+ impl_binary!(f64x2: powf_v2f64); -+ impl_binary!(f64x4: powf_v4f64); -+ impl_binary!(f64x8: powf_v8f64); -+ } -+ } -+ } else { -+ impl_binary!(f32x2[f32; 2]: powf_f32); -+ impl_binary!(f32x4: powf_v4f32); -+ impl_binary!(f32x8: powf_v8f32); -+ impl_binary!(f32x16: powf_v16f32); -+ -+ impl_binary!(f64x2: powf_v2f64); -+ impl_binary!(f64x4: powf_v4f64); -+ impl_binary!(f64x8: powf_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs -new file mode 100644 -index 000000000000..7b014d07da8d ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs -@@ -0,0 +1,103 @@ -+//! Vertical floating-point `sin` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors sin -+ -+use crate::*; -+ -+crate trait Sin { -+ fn sin(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.sin.v2f32"] -+ fn sin_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.sin.v4f32"] -+ fn sin_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.sin.v8f32"] -+ fn sin_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.sin.v16f32"] -+ fn sin_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit single elem vectors -+ #[link_name = "llvm.sin.v1f64"] -+ fn sin_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.sin.v2f64"] -+ fn sin_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.sin.v4f64"] -+ fn sin_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.sin.v8f64"] -+ fn sin_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.sin.f32"] -+ fn sin_f32(x: f32) -> f32; -+ #[link_name = "llvm.sin.f64"] -+ fn sin_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Sin, sin); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: sin_f32); -+ impl_unary!(f32x4[f32; 4]: sin_f32); -+ impl_unary!(f32x8[f32; 8]: sin_f32); -+ impl_unary!(f32x16[f32; 16]: sin_f32); -+ -+ impl_unary!(f64x2[f64; 2]: sin_f64); -+ impl_unary!(f64x4[f64; 4]: sin_f64); -+ impl_unary!(f64x8[f64; 8]: sin_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); -+ -+ impl_unary!(f32x4: Sleef_sinf4_u10avx2128); -+ impl_unary!(f32x8: Sleef_sinf8_u10avx2); -+ impl_unary!(f64x2: Sleef_sind2_u10avx2128); -+ impl_unary!(f64x4: Sleef_sind4_u10avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); -+ -+ impl_unary!(f32x4: Sleef_sinf4_u10sse4); -+ impl_unary!(f32x8: Sleef_sinf8_u10avx); -+ impl_unary!(f64x2: Sleef_sind2_u10sse4); -+ impl_unary!(f64x4: Sleef_sind4_u10avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); -+ -+ impl_unary!(f32x4: Sleef_sinf4_u10sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); -+ impl_unary!(f64x2: Sleef_sind2_u10sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); -+ } else { -+ impl_unary!(f32x2[f32; 2]: sin_f32); -+ impl_unary!(f32x16: sin_v16f32); -+ impl_unary!(f64x8: sin_v8f64); -+ -+ impl_unary!(f32x4: sin_v4f32); -+ impl_unary!(f32x8: sin_v8f32); -+ impl_unary!(f64x2: sin_v2f64); -+ impl_unary!(f64x4: sin_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: sin_f32); -+ impl_unary!(f32x4: sin_v4f32); -+ impl_unary!(f32x8: sin_v8f32); -+ impl_unary!(f32x16: sin_v16f32); -+ -+ impl_unary!(f64x2: sin_v2f64); -+ impl_unary!(f64x4: sin_v4f64); -+ impl_unary!(f64x8: sin_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs -new file mode 100644 -index 000000000000..0f1249ec88f0 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs -@@ -0,0 +1,195 @@ -+//! Vertical floating-point `sin_cos` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors sin_cos -+ -+use crate::*; -+ -+crate trait SinCosPi: Sized { -+ type Output; -+ fn sin_cos_pi(self) -> Self::Output; -+} -+ -+macro_rules! impl_def { -+ ($vid:ident, $PI:path) => { -+ impl SinCosPi for $vid { -+ type Output = (Self, Self); -+ #[inline] -+ fn sin_cos_pi(self) -> Self::Output { -+ let v = self * Self::splat($PI); -+ (v.sin(), v.cos()) -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_def32 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f32::consts::PI); -+ }; -+} -+macro_rules! impl_def64 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f64::consts::PI); -+ }; -+} -+ -+macro_rules! impl_unary_t { -+ ($vid:ident: $fun:ident) => { -+ impl SinCosPi for $vid { -+ type Output = (Self, Self); -+ fn sin_cos_pi(self) -> Self::Output { -+ unsafe { -+ use crate::mem::transmute; -+ transmute($fun(transmute(self))) -+ } -+ } -+ } -+ }; -+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { -+ impl SinCosPi for $vid { -+ type Output = (Self, Self); -+ fn sin_cos_pi(self) -> Self::Output { -+ unsafe { -+ use crate::mem::{transmute, uninitialized}; -+ -+ union U { -+ vec: [$vid; 2], -+ twice: $vid_t, -+ } -+ -+ let twice = U { vec: [self, uninitialized()] }.twice; -+ let twice = transmute($fun(transmute(twice))); -+ -+ union R { -+ twice: ($vid_t, $vid_t), -+ vecs: ([$vid; 2], [$vid; 2]), -+ } -+ let r = R { twice }.vecs; -+ (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) -+ } -+ } -+ } -+ }; -+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { -+ impl SinCosPi for $vid { -+ type Output = (Self, Self); -+ fn sin_cos_pi(self) -> Self::Output { -+ unsafe { -+ use crate::mem::transmute; -+ -+ union U { -+ vec: $vid, -+ halves: [$vid_h; 2], -+ } -+ -+ let halves = U { vec: self }.halves; -+ -+ let res_0: ($vid_h, $vid_h) = -+ transmute($fun(transmute(*halves.get_unchecked(0)))); -+ let res_1: ($vid_h, $vid_h) = -+ transmute($fun(transmute(*halves.get_unchecked(1)))); -+ -+ union R { -+ result: ($vid, $vid), -+ halves: ([$vid_h; 2], [$vid_h; 2]), -+ } -+ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } -+ .result -+ } -+ } -+ } -+ }; -+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { -+ impl SinCosPi for $vid { -+ type Output = (Self, Self); -+ fn sin_cos_pi(self) -> Self::Output { -+ unsafe { -+ use crate::mem::transmute; -+ -+ union U { -+ vec: $vid, -+ quarters: [$vid_q; 4], -+ } -+ -+ let quarters = U { vec: self }.quarters; -+ -+ let res_0: ($vid_q, $vid_q) = -+ transmute($fun(transmute(*quarters.get_unchecked(0)))); -+ let res_1: ($vid_q, $vid_q) = -+ transmute($fun(transmute(*quarters.get_unchecked(1)))); -+ let res_2: ($vid_q, $vid_q) = -+ transmute($fun(transmute(*quarters.get_unchecked(2)))); -+ let res_3: ($vid_q, $vid_q) = -+ transmute($fun(transmute(*quarters.get_unchecked(3)))); -+ -+ union R { -+ result: ($vid, $vid), -+ quarters: ([$vid_q; 4], [$vid_q; 4]), -+ } -+ R { -+ quarters: ( -+ [res_0.0, res_1.0, res_2.0, res_3.0], -+ [res_0.1, res_1.1, res_2.1, res_3.1], -+ ), -+ } -+ .result -+ } -+ } -+ } -+ }; -+} -+ -+cfg_if! { -+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); -+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); -+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); -+ -+ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); -+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); -+ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); -+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); -+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); -+ -+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); -+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); -+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); -+ -+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); -+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); -+ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+ } -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs -new file mode 100644 -index 000000000000..72df98c93c91 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs -@@ -0,0 +1,87 @@ -+//! Vertical floating-point `sin_pi` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors sin_pi -+ -+use crate::*; -+ -+crate trait SinPi { -+ fn sin_pi(self) -> Self; -+} -+ -+gen_unary_impl_table!(SinPi, sin_pi); -+ -+macro_rules! impl_def { -+ ($vid:ident, $PI:path) => { -+ impl SinPi for $vid { -+ #[inline] -+ fn sin_pi(self) -> Self { -+ (self * Self::splat($PI)).sin() -+ } -+ } -+ }; -+} -+macro_rules! impl_def32 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f32::consts::PI); -+ }; -+} -+macro_rules! impl_def64 { -+ ($vid:ident) => { -+ impl_def!($vid, crate::f64::consts::PI); -+ }; -+} -+ -+cfg_if! { -+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); -+ -+ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); -+ impl_unary!(f32x8: Sleef_sinpif8_u05avx2); -+ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); -+ impl_unary!(f64x4: Sleef_sinpid4_u05avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); -+ -+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); -+ impl_unary!(f32x8: Sleef_sinpif8_u05avx); -+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); -+ impl_unary!(f64x4: Sleef_sinpid4_u05avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); -+ -+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); -+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+ } -+ } else { -+ impl_def32!(f32x2); -+ impl_def32!(f32x4); -+ impl_def32!(f32x8); -+ impl_def32!(f32x16); -+ -+ impl_def64!(f64x2); -+ impl_def64!(f64x4); -+ impl_def64!(f64x8); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs -new file mode 100644 -index 000000000000..7ce31df62662 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs -@@ -0,0 +1,103 @@ -+//! Vertical floating-point `sqrt` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors sqrt -+ -+use crate::*; -+ -+crate trait Sqrt { -+ fn sqrt(self) -> Self; -+} -+ -+#[allow(improper_ctypes)] -+extern "C" { -+ #[link_name = "llvm.sqrt.v2f32"] -+ fn sqrt_v2f32(x: f32x2) -> f32x2; -+ #[link_name = "llvm.sqrt.v4f32"] -+ fn sqrt_v4f32(x: f32x4) -> f32x4; -+ #[link_name = "llvm.sqrt.v8f32"] -+ fn sqrt_v8f32(x: f32x8) -> f32x8; -+ #[link_name = "llvm.sqrt.v16f32"] -+ fn sqrt_v16f32(x: f32x16) -> f32x16; -+ /* FIXME 64-bit sqrtgle elem vectors -+ #[link_name = "llvm.sqrt.v1f64"] -+ fn sqrt_v1f64(x: f64x1) -> f64x1; -+ */ -+ #[link_name = "llvm.sqrt.v2f64"] -+ fn sqrt_v2f64(x: f64x2) -> f64x2; -+ #[link_name = "llvm.sqrt.v4f64"] -+ fn sqrt_v4f64(x: f64x4) -> f64x4; -+ #[link_name = "llvm.sqrt.v8f64"] -+ fn sqrt_v8f64(x: f64x8) -> f64x8; -+ -+ #[link_name = "llvm.sqrt.f32"] -+ fn sqrt_f32(x: f32) -> f32; -+ #[link_name = "llvm.sqrt.f64"] -+ fn sqrt_f64(x: f64) -> f64; -+} -+ -+gen_unary_impl_table!(Sqrt, sqrt); -+ -+cfg_if! { -+ if #[cfg(target_arch = "s390x")] { -+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 -+ impl_unary!(f32x2[f32; 2]: sqrt_f32); -+ impl_unary!(f32x4[f32; 4]: sqrt_f32); -+ impl_unary!(f32x8[f32; 8]: sqrt_f32); -+ impl_unary!(f32x16[f32; 16]: sqrt_f32); -+ -+ impl_unary!(f64x2[f64; 2]: sqrt_f64); -+ impl_unary!(f64x4[f64; 4]: sqrt_f64); -+ impl_unary!(f64x8[f64; 8]: sqrt_f64); -+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_avx2128); -+ impl_unary!(f32x8: Sleef_sqrtf8_avx2); -+ impl_unary!(f64x2: Sleef_sqrtd2_avx2128); -+ impl_unary!(f64x4: Sleef_sqrtd4_avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_sse4); -+ impl_unary!(f32x8: Sleef_sqrtf8_avx); -+ impl_unary!(f64x2: Sleef_sqrtd2_sse4); -+ impl_unary!(f64x4: Sleef_sqrtd4_avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); -+ impl_unary!(f64x2: Sleef_sqrtd2_sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); -+ } else { -+ impl_unary!(f32x2[f32; 2]: sqrt_f32); -+ impl_unary!(f32x16: sqrt_v16f32); -+ impl_unary!(f64x8: sqrt_v8f64); -+ -+ impl_unary!(f32x4: sqrt_v4f32); -+ impl_unary!(f32x8: sqrt_v8f32); -+ impl_unary!(f64x2: sqrt_v2f64); -+ impl_unary!(f64x4: sqrt_v4f64); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[f32; 2]: sqrt_f32); -+ impl_unary!(f32x4: sqrt_v4f32); -+ impl_unary!(f32x8: sqrt_v8f32); -+ impl_unary!(f32x16: sqrt_v16f32); -+ -+ impl_unary!(f64x2: sqrt_v2f64); -+ impl_unary!(f64x4: sqrt_v4f64); -+ impl_unary!(f64x8: sqrt_v8f64); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs -new file mode 100644 -index 000000000000..c1e379c34241 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs -@@ -0,0 +1,67 @@ -+//! Vertical floating-point `sqrt` -+#![allow(unused)] -+ -+// FIXME 64-bit 1 elem vectors sqrte -+ -+use crate::llvm::simd_fsqrt; -+use crate::*; -+ -+crate trait Sqrte { -+ fn sqrte(self) -> Self; -+} -+ -+gen_unary_impl_table!(Sqrte, sqrte); -+ -+cfg_if! { -+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { -+ use sleef_sys::*; -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); -+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); -+ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); -+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); -+ } else if #[cfg(target_feature = "avx")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); -+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx); -+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); -+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx); -+ } else if #[cfg(target_feature = "sse4.2")] { -+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); -+ -+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); -+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); -+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); -+ } else { -+ impl_unary!(f32x2[g]: simd_fsqrt); -+ impl_unary!(f32x16[g]: simd_fsqrt); -+ impl_unary!(f64x8[g]: simd_fsqrt); -+ -+ impl_unary!(f32x4[g]: simd_fsqrt); -+ impl_unary!(f32x8[g]: simd_fsqrt); -+ impl_unary!(f64x2[g]: simd_fsqrt); -+ impl_unary!(f64x4[g]: simd_fsqrt); -+ } -+ } -+ } else { -+ impl_unary!(f32x2[g]: simd_fsqrt); -+ impl_unary!(f32x4[g]: simd_fsqrt); -+ impl_unary!(f32x8[g]: simd_fsqrt); -+ impl_unary!(f32x16[g]: simd_fsqrt); -+ -+ impl_unary!(f64x2[g]: simd_fsqrt); -+ impl_unary!(f64x4[g]: simd_fsqrt); -+ impl_unary!(f64x8[g]: simd_fsqrt); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs -new file mode 100644 -index 000000000000..39f493d3b17f ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs -@@ -0,0 +1,28 @@ -+//! Provides `isize` and `usize` -+ -+use cfg_if::cfg_if; -+ -+cfg_if! { -+ if #[cfg(target_pointer_width = "8")] { -+ crate type isize_ = i8; -+ crate type usize_ = u8; -+ } else if #[cfg(target_pointer_width = "16")] { -+ crate type isize_ = i16; -+ crate type usize_ = u16; -+ } else if #[cfg(target_pointer_width = "32")] { -+ crate type isize_ = i32; -+ crate type usize_ = u32; -+ -+ } else if #[cfg(target_pointer_width = "64")] { -+ crate type isize_ = i64; -+ crate type usize_ = u64; -+ } else if #[cfg(target_pointer_width = "64")] { -+ crate type isize_ = i64; -+ crate type usize_ = u64; -+ } else if #[cfg(target_pointer_width = "128")] { -+ crate type isize_ = i128; -+ crate type usize_ = u128; -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs -new file mode 100644 -index 000000000000..7be4f5fabbea ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions.rs -@@ -0,0 +1 @@ -+crate mod mask; -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs -new file mode 100644 -index 000000000000..97260c6d4e03 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs -@@ -0,0 +1,69 @@ -+//! Code generation workaround for `all()` mask horizontal reduction. -+//! -+//! Works arround [LLVM bug 36702]. -+//! -+//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 -+#![allow(unused_macros)] -+ -+use crate::*; -+ -+crate trait All: crate::marker::Sized { -+ unsafe fn all(self) -> bool; -+} -+ -+crate trait Any: crate::marker::Sized { -+ unsafe fn any(self) -> bool; -+} -+ -+#[macro_use] -+mod fallback_impl; -+ -+cfg_if! { -+ if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { -+ #[macro_use] -+ mod x86; -+ } else if #[cfg(all(target_arch = "arm", target_feature = "v7", -+ target_feature = "neon", -+ any(feature = "core_arch", libcore_neon)))] { -+ #[macro_use] -+ mod arm; -+ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { -+ #[macro_use] -+ mod aarch64; -+ } else { -+ #[macro_use] -+ mod fallback; -+ } -+} -+ -+impl_mask_reductions!(m8x2); -+impl_mask_reductions!(m8x4); -+impl_mask_reductions!(m8x8); -+impl_mask_reductions!(m8x16); -+impl_mask_reductions!(m8x32); -+impl_mask_reductions!(m8x64); -+ -+impl_mask_reductions!(m16x2); -+impl_mask_reductions!(m16x4); -+impl_mask_reductions!(m16x8); -+impl_mask_reductions!(m16x16); -+impl_mask_reductions!(m16x32); -+ -+impl_mask_reductions!(m32x2); -+impl_mask_reductions!(m32x4); -+impl_mask_reductions!(m32x8); -+impl_mask_reductions!(m32x16); -+ -+// FIXME: 64-bit single element vector -+// impl_mask_reductions!(m64x1); -+impl_mask_reductions!(m64x2); -+impl_mask_reductions!(m64x4); -+impl_mask_reductions!(m64x8); -+ -+impl_mask_reductions!(m128x1); -+impl_mask_reductions!(m128x2); -+impl_mask_reductions!(m128x4); -+ -+impl_mask_reductions!(msizex2); -+impl_mask_reductions!(msizex4); -+impl_mask_reductions!(msizex8); -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs -new file mode 100644 -index 000000000000..e9586eace1ff ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs -@@ -0,0 +1,71 @@ -+//! Mask reductions implementation for `aarch64` targets -+ -+/// 128-bit wide vectors -+macro_rules! aarch64_128_neon_impl { -+ ($id:ident, $vmin:ident, $vmax:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "neon")] -+ unsafe fn all(self) -> bool { -+ use crate::arch::aarch64::$vmin; -+ $vmin(crate::mem::transmute(self)) != 0 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "neon")] -+ unsafe fn any(self) -> bool { -+ use crate::arch::aarch64::$vmax; -+ $vmax(crate::mem::transmute(self)) != 0 -+ } -+ } -+ } -+} -+ -+/// 64-bit wide vectors -+macro_rules! aarch64_64_neon_impl { -+ ($id:ident, $vec128:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "neon")] -+ unsafe fn all(self) -> bool { -+ // Duplicates the 64-bit vector into a 128-bit one and -+ // calls all on that. -+ union U { -+ halves: ($id, $id), -+ vec: $vec128, -+ } -+ U { -+ halves: (self, self), -+ }.vec.all() -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "neon")] -+ unsafe fn any(self) -> bool { -+ union U { -+ halves: ($id, $id), -+ vec: $vec128, -+ } -+ U { -+ halves: (self, self), -+ }.vec.any() -+ } -+ } -+ }; -+} -+ -+/// Mask reduction implementation for `aarch64` targets -+macro_rules! impl_mask_reductions { -+ // 64-bit wide masks -+ (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; -+ (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; -+ (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; -+ // 128-bit wide masks -+ (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; -+ (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; -+ (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; -+ // Fallback to LLVM's default code-generation: -+ ($id:ident) => { fallback_impl!($id); }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs -new file mode 100644 -index 000000000000..1987af7a9676 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs -@@ -0,0 +1,54 @@ -+//! Mask reductions implementation for `arm` targets -+ -+/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with -+/// more than two elements. -+macro_rules! arm_128_v7_neon_impl { -+ ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "v7,neon")] -+ unsafe fn all(self) -> bool { -+ use crate::arch::arm::$vpmin; -+ use crate::mem::transmute; -+ union U { -+ halves: ($half, $half), -+ vec: $id, -+ } -+ let halves = U { vec: self }.halves; -+ let h: $half = transmute($vpmin( -+ transmute(halves.0), -+ transmute(halves.1), -+ )); -+ h.all() -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "v7,neon")] -+ unsafe fn any(self) -> bool { -+ use crate::arch::arm::$vpmax; -+ use crate::mem::transmute; -+ union U { -+ halves: ($half, $half), -+ vec: $id, -+ } -+ let halves = U { vec: self }.halves; -+ let h: $half = transmute($vpmax( -+ transmute(halves.0), -+ transmute(halves.1), -+ )); -+ h.any() -+ } -+ } -+ }; -+} -+ -+/// Mask reduction implementation for `arm` targets -+macro_rules! impl_mask_reductions { -+ // 128-bit wide masks -+ (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; -+ (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; -+ (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; -+ // Fallback to LLVM's default code-generation: -+ ($id:ident) => { fallback_impl!($id); }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs -new file mode 100644 -index 000000000000..25e5c813abca ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs -@@ -0,0 +1,6 @@ -+//! Default mask reduction implementations. -+ -+/// Default mask reduction implementation -+macro_rules! impl_mask_reductions { -+ ($id:ident) => { fallback_impl!($id); }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs -new file mode 100644 -index 000000000000..0d246e2fdab6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs -@@ -0,0 +1,237 @@ -+//! Default implementation of a mask reduction for any target. -+ -+macro_rules! fallback_to_other_impl { -+ ($id:ident, $other:ident) => { -+ impl All for $id { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let m: $other = crate::mem::transmute(self); -+ m.all() -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let m: $other = crate::mem::transmute(self); -+ m.any() -+ } -+ } -+ }; -+} -+ -+/// Fallback implementation. -+macro_rules! fallback_impl { -+ // 16-bit wide masks: -+ (m8x2) => { -+ impl All for m8x2 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: u16 = crate::mem::transmute(self); -+ i == u16::max_value() -+ } -+ } -+ impl Any for m8x2 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: u16 = crate::mem::transmute(self); -+ i != 0 -+ } -+ } -+ }; -+ // 32-bit wide masks -+ (m8x4) => { -+ impl All for m8x4 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: u32 = crate::mem::transmute(self); -+ i == u32::max_value() -+ } -+ } -+ impl Any for m8x4 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: u32 = crate::mem::transmute(self); -+ i != 0 -+ } -+ } -+ }; -+ (m16x2) => { -+ fallback_to_other_impl!(m16x2, m8x4); -+ }; -+ // 64-bit wide masks: -+ (m8x8) => { -+ impl All for m8x8 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: u64 = crate::mem::transmute(self); -+ i == u64::max_value() -+ } -+ } -+ impl Any for m8x8 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: u64 = crate::mem::transmute(self); -+ i != 0 -+ } -+ } -+ }; -+ (m16x4) => { -+ fallback_to_other_impl!(m16x4, m8x8); -+ }; -+ (m32x2) => { -+ fallback_to_other_impl!(m32x2, m16x4); -+ }; -+ // FIXME: 64x1 maxk -+ // 128-bit wide masks: -+ (m8x16) => { -+ impl All for m8x16 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: u128 = crate::mem::transmute(self); -+ i == u128::max_value() -+ } -+ } -+ impl Any for m8x16 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: u128 = crate::mem::transmute(self); -+ i != 0 -+ } -+ } -+ }; -+ (m16x8) => { -+ fallback_to_other_impl!(m16x8, m8x16); -+ }; -+ (m32x4) => { -+ fallback_to_other_impl!(m32x4, m16x8); -+ }; -+ (m64x2) => { -+ fallback_to_other_impl!(m64x2, m32x4); -+ }; -+ (m128x1) => { -+ fallback_to_other_impl!(m128x1, m64x2); -+ }; -+ // 256-bit wide masks -+ (m8x32) => { -+ impl All for m8x32 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: [u128; 2] = crate::mem::transmute(self); -+ let o: [u128; 2] = [u128::max_value(); 2]; -+ i == o -+ } -+ } -+ impl Any for m8x32 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: [u128; 2] = crate::mem::transmute(self); -+ let o: [u128; 2] = [0; 2]; -+ i != o -+ } -+ } -+ }; -+ (m16x16) => { -+ fallback_to_other_impl!(m16x16, m8x32); -+ }; -+ (m32x8) => { -+ fallback_to_other_impl!(m32x8, m16x16); -+ }; -+ (m64x4) => { -+ fallback_to_other_impl!(m64x4, m32x8); -+ }; -+ (m128x2) => { -+ fallback_to_other_impl!(m128x2, m64x4); -+ }; -+ // 512-bit wide masks -+ (m8x64) => { -+ impl All for m8x64 { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ let i: [u128; 4] = crate::mem::transmute(self); -+ let o: [u128; 4] = [u128::max_value(); 4]; -+ i == o -+ } -+ } -+ impl Any for m8x64 { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ let i: [u128; 4] = crate::mem::transmute(self); -+ let o: [u128; 4] = [0; 4]; -+ i != o -+ } -+ } -+ }; -+ (m16x32) => { -+ fallback_to_other_impl!(m16x32, m8x64); -+ }; -+ (m32x16) => { -+ fallback_to_other_impl!(m32x16, m16x32); -+ }; -+ (m64x8) => { -+ fallback_to_other_impl!(m64x8, m32x16); -+ }; -+ (m128x4) => { -+ fallback_to_other_impl!(m128x4, m64x8); -+ }; -+ // Masks with pointer-sized elements64 -+ (msizex2) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex2, m64x2); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex2, m32x2); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+ (msizex4) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex4, m64x4); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex4, m32x4); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+ (msizex8) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex8, m64x8); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex8, m32x8); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+} -+ -+macro_rules! recurse_half { -+ ($vid:ident, $vid_h:ident) => { -+ impl All for $vid { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ union U { -+ halves: ($vid_h, $vid_h), -+ vec: $vid, -+ } -+ let halves = U { vec: self }.halves; -+ halves.0.all() && halves.1.all() -+ } -+ } -+ impl Any for $vid { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ union U { -+ halves: ($vid_h, $vid_h), -+ vec: $vid, -+ } -+ let halves = U { vec: self }.halves; -+ halves.0.any() || halves.1.any() -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs -new file mode 100644 -index 000000000000..2ae4ed81c416 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs -@@ -0,0 +1,194 @@ -+//! Mask reductions implementation for `x86` and `x86_64` targets -+ -+#[cfg(target_feature = "sse")] -+#[macro_use] -+mod sse; -+ -+#[cfg(target_feature = "sse2")] -+#[macro_use] -+mod sse2; -+ -+#[cfg(target_feature = "avx")] -+#[macro_use] -+mod avx; -+ -+#[cfg(target_feature = "avx2")] -+#[macro_use] -+mod avx2; -+ -+/// x86 64-bit m8x8 implementation -+macro_rules! x86_m8x8_impl { -+ ($id:ident) => { -+ cfg_if! { -+ if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { -+ x86_m8x8_sse_impl!($id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 128-bit m8x16 implementation -+macro_rules! x86_m8x16_impl { -+ ($id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "sse2")] { -+ x86_m8x16_sse2_impl!($id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 128-bit m32x4 implementation -+macro_rules! x86_m32x4_impl { -+ ($id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "sse")] { -+ x86_m32x4_sse_impl!($id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 128-bit m64x2 implementation -+macro_rules! x86_m64x2_impl { -+ ($id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "sse2")] { -+ x86_m64x2_sse2_impl!($id); -+ } else if #[cfg(target_feature = "sse")] { -+ x86_m32x4_sse_impl!($id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 256-bit m8x32 implementation -+macro_rules! x86_m8x32_impl { -+ ($id:ident, $half_id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "avx2")] { -+ x86_m8x32_avx2_impl!($id); -+ } else if #[cfg(target_feature = "avx")] { -+ x86_m8x32_avx_impl!($id); -+ } else if #[cfg(target_feature = "sse2")] { -+ recurse_half!($id, $half_id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 256-bit m32x8 implementation -+macro_rules! x86_m32x8_impl { -+ ($id:ident, $half_id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "avx")] { -+ x86_m32x8_avx_impl!($id); -+ } else if #[cfg(target_feature = "sse")] { -+ recurse_half!($id, $half_id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// x86 256-bit m64x4 implementation -+macro_rules! x86_m64x4_impl { -+ ($id:ident, $half_id:ident) => { -+ cfg_if! { -+ if #[cfg(target_feature = "avx")] { -+ x86_m64x4_avx_impl!($id); -+ } else if #[cfg(target_feature = "sse")] { -+ recurse_half!($id, $half_id); -+ } else { -+ fallback_impl!($id); -+ } -+ } -+ }; -+} -+ -+/// Fallback implementation. -+macro_rules! x86_intr_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ unsafe fn all(self) -> bool { -+ use crate::llvm::simd_reduce_all; -+ simd_reduce_all(self.0) -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ unsafe fn any(self) -> bool { -+ use crate::llvm::simd_reduce_any; -+ simd_reduce_any(self.0) -+ } -+ } -+ }; -+} -+ -+/// Mask reduction implementation for `x86` and `x86_64` targets -+macro_rules! impl_mask_reductions { -+ // 64-bit wide masks -+ (m8x8) => { x86_m8x8_impl!(m8x8); }; -+ (m16x4) => { x86_m8x8_impl!(m16x4); }; -+ (m32x2) => { x86_m8x8_impl!(m32x2); }; -+ // 128-bit wide masks -+ (m8x16) => { x86_m8x16_impl!(m8x16); }; -+ (m16x8) => { x86_m8x16_impl!(m16x8); }; -+ (m32x4) => { x86_m32x4_impl!(m32x4); }; -+ (m64x2) => { x86_m64x2_impl!(m64x2); }; -+ (m128x1) => { x86_intr_impl!(m128x1); }; -+ // 256-bit wide masks: -+ (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; -+ (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; -+ (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; -+ (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; -+ (m128x2) => { x86_intr_impl!(m128x2); }; -+ (msizex2) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex2, m64x2); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex2, m32x2); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+ (msizex4) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex4, m64x4); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex4, m32x4); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+ (msizex8) => { -+ cfg_if! { -+ if #[cfg(target_pointer_width = "64")] { -+ fallback_to_other_impl!(msizex8, m64x8); -+ } else if #[cfg(target_pointer_width = "32")] { -+ fallback_to_other_impl!(msizex8, m32x8); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+ } -+ }; -+ -+ // Fallback to LLVM's default code-generation: -+ ($id:ident) => { fallback_impl!($id); }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs -new file mode 100644 -index 000000000000..d18736fb0399 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs -@@ -0,0 +1,101 @@ -+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` -+ -+/// `x86`/`x86_64` 256-bit `AVX` implementation -+/// FIXME: it might be faster here to do two `_mm_movmask_epi8` -+#[cfg(target_feature = "avx")] -+macro_rules! x86_m8x32_avx_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "avx")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_testc_si256; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_testc_si256; -+ _mm256_testc_si256( -+ crate::mem::transmute(self), -+ crate::mem::transmute($id::splat(true)), -+ ) != 0 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "avx")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_testz_si256; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_testz_si256; -+ _mm256_testz_si256( -+ crate::mem::transmute(self), -+ crate::mem::transmute(self), -+ ) == 0 -+ } -+ } -+ }; -+} -+ -+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation -+macro_rules! x86_m32x8_avx_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_ps; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_ps; -+ // _mm256_movemask_ps(a) creates a 8bit mask containing the -+ // most significant bit of each lane of `a`. If all bits are -+ // set, then all 8 lanes of the mask are true. -+ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_ps; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_ps; -+ -+ _mm256_movemask_ps(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -+ -+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation -+macro_rules! x86_m64x4_avx_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_pd; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_pd; -+ // _mm256_movemask_pd(a) creates a 4bit mask containing the -+ // most significant bit of each lane of `a`. If all bits are -+ // set, then all 4 lanes of the mask are true. -+ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_pd; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_pd; -+ -+ _mm256_movemask_pd(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs -new file mode 100644 -index 000000000000..d37d02342092 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs -@@ -0,0 +1,35 @@ -+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. -+#![allow(unused)] -+ -+/// x86/x86_64 256-bit m8x32 AVX2 implementation -+macro_rules! x86_m8x32_avx2_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse2")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_epi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_epi8; -+ // _mm256_movemask_epi8(a) creates a 32bit mask containing the -+ // most significant bit of each byte of `a`. If all -+ // bits are set, then all 32 lanes of the mask are -+ // true. -+ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse2")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm256_movemask_epi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm256_movemask_epi8; -+ -+ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs -new file mode 100644 -index 000000000000..7482f9430a14 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs -@@ -0,0 +1,68 @@ -+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. -+#![allow(unused)] -+ -+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation -+macro_rules! x86_m32x4_sse_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_ps; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_ps; -+ // _mm_movemask_ps(a) creates a 4bit mask containing the -+ // most significant bit of each lane of `a`. If all -+ // bits are set, then all 4 lanes of the mask are -+ // true. -+ _mm_movemask_ps(crate::mem::transmute(self)) -+ == 0b_1111_i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_ps; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_ps; -+ -+ _mm_movemask_ps(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -+ -+macro_rules! x86_m8x8_sse_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_pi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_pi8; -+ // _mm_movemask_pi8(a) creates an 8bit mask containing the most -+ // significant bit of each byte of `a`. If all bits are set, -+ // then all 8 lanes of the mask are true. -+ _mm_movemask_pi8(crate::mem::transmute(self)) -+ == u8::max_value() as i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_pi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_pi8; -+ -+ _mm_movemask_pi8(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs -new file mode 100644 -index 000000000000..a99c606f5268 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs -@@ -0,0 +1,70 @@ -+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. -+#![allow(unused)] -+ -+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation -+macro_rules! x86_m64x2_sse2_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_pd; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_pd; -+ // _mm_movemask_pd(a) creates a 2bit mask containing the -+ // most significant bit of each lane of `a`. If all -+ // bits are set, then all 2 lanes of the mask are -+ // true. -+ _mm_movemask_pd(crate::mem::transmute(self)) -+ == 0b_11_i32 -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_pd; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_pd; -+ -+ _mm_movemask_pd(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -+ -+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation -+macro_rules! x86_m8x16_sse2_impl { -+ ($id:ident) => { -+ impl All for $id { -+ #[inline] -+ #[target_feature(enable = "sse2")] -+ unsafe fn all(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_epi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_epi8; -+ // _mm_movemask_epi8(a) creates a 16bit mask containing the -+ // most significant bit of each byte of `a`. If all -+ // bits are set, then all 16 lanes of the mask are -+ // true. -+ _mm_movemask_epi8(crate::mem::transmute(self)) -+ == i32::from(u16::max_value()) -+ } -+ } -+ impl Any for $id { -+ #[inline] -+ #[target_feature(enable = "sse2")] -+ unsafe fn any(self) -> bool { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_movemask_epi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_movemask_epi8; -+ -+ _mm_movemask_epi8(crate::mem::transmute(self)) != 0 -+ } -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs -new file mode 100644 -index 000000000000..35a9db905339 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs -@@ -0,0 +1,302 @@ -+//! Implementations of the `ShuffleResult` trait for the different numbers of -+//! lanes and vector element types. -+ -+use crate::masks::*; -+use crate::sealed::Shuffle; -+ -+impl Shuffle<[u32; 2]> for i8 { -+ type Output = crate::codegen::i8x2; -+} -+impl Shuffle<[u32; 4]> for i8 { -+ type Output = crate::codegen::i8x4; -+} -+impl Shuffle<[u32; 8]> for i8 { -+ type Output = crate::codegen::i8x8; -+} -+impl Shuffle<[u32; 16]> for i8 { -+ type Output = crate::codegen::i8x16; -+} -+impl Shuffle<[u32; 32]> for i8 { -+ type Output = crate::codegen::i8x32; -+} -+impl Shuffle<[u32; 64]> for i8 { -+ type Output = crate::codegen::i8x64; -+} -+ -+impl Shuffle<[u32; 2]> for u8 { -+ type Output = crate::codegen::u8x2; -+} -+impl Shuffle<[u32; 4]> for u8 { -+ type Output = crate::codegen::u8x4; -+} -+impl Shuffle<[u32; 8]> for u8 { -+ type Output = crate::codegen::u8x8; -+} -+impl Shuffle<[u32; 16]> for u8 { -+ type Output = crate::codegen::u8x16; -+} -+impl Shuffle<[u32; 32]> for u8 { -+ type Output = crate::codegen::u8x32; -+} -+impl Shuffle<[u32; 64]> for u8 { -+ type Output = crate::codegen::u8x64; -+} -+ -+impl Shuffle<[u32; 2]> for m8 { -+ type Output = crate::codegen::m8x2; -+} -+impl Shuffle<[u32; 4]> for m8 { -+ type Output = crate::codegen::m8x4; -+} -+impl Shuffle<[u32; 8]> for m8 { -+ type Output = crate::codegen::m8x8; -+} -+impl Shuffle<[u32; 16]> for m8 { -+ type Output = crate::codegen::m8x16; -+} -+impl Shuffle<[u32; 32]> for m8 { -+ type Output = crate::codegen::m8x32; -+} -+impl Shuffle<[u32; 64]> for m8 { -+ type Output = crate::codegen::m8x64; -+} -+ -+impl Shuffle<[u32; 2]> for i16 { -+ type Output = crate::codegen::i16x2; -+} -+impl Shuffle<[u32; 4]> for i16 { -+ type Output = crate::codegen::i16x4; -+} -+impl Shuffle<[u32; 8]> for i16 { -+ type Output = crate::codegen::i16x8; -+} -+impl Shuffle<[u32; 16]> for i16 { -+ type Output = crate::codegen::i16x16; -+} -+impl Shuffle<[u32; 32]> for i16 { -+ type Output = crate::codegen::i16x32; -+} -+ -+impl Shuffle<[u32; 2]> for u16 { -+ type Output = crate::codegen::u16x2; -+} -+impl Shuffle<[u32; 4]> for u16 { -+ type Output = crate::codegen::u16x4; -+} -+impl Shuffle<[u32; 8]> for u16 { -+ type Output = crate::codegen::u16x8; -+} -+impl Shuffle<[u32; 16]> for u16 { -+ type Output = crate::codegen::u16x16; -+} -+impl Shuffle<[u32; 32]> for u16 { -+ type Output = crate::codegen::u16x32; -+} -+ -+impl Shuffle<[u32; 2]> for m16 { -+ type Output = crate::codegen::m16x2; -+} -+impl Shuffle<[u32; 4]> for m16 { -+ type Output = crate::codegen::m16x4; -+} -+impl Shuffle<[u32; 8]> for m16 { -+ type Output = crate::codegen::m16x8; -+} -+impl Shuffle<[u32; 16]> for m16 { -+ type Output = crate::codegen::m16x16; -+} -+impl Shuffle<[u32; 32]> for m16 { -+ type Output = crate::codegen::m16x32; -+} -+ -+impl Shuffle<[u32; 2]> for i32 { -+ type Output = crate::codegen::i32x2; -+} -+impl Shuffle<[u32; 4]> for i32 { -+ type Output = crate::codegen::i32x4; -+} -+impl Shuffle<[u32; 8]> for i32 { -+ type Output = crate::codegen::i32x8; -+} -+impl Shuffle<[u32; 16]> for i32 { -+ type Output = crate::codegen::i32x16; -+} -+ -+impl Shuffle<[u32; 2]> for u32 { -+ type Output = crate::codegen::u32x2; -+} -+impl Shuffle<[u32; 4]> for u32 { -+ type Output = crate::codegen::u32x4; -+} -+impl Shuffle<[u32; 8]> for u32 { -+ type Output = crate::codegen::u32x8; -+} -+impl Shuffle<[u32; 16]> for u32 { -+ type Output = crate::codegen::u32x16; -+} -+ -+impl Shuffle<[u32; 2]> for f32 { -+ type Output = crate::codegen::f32x2; -+} -+impl Shuffle<[u32; 4]> for f32 { -+ type Output = crate::codegen::f32x4; -+} -+impl Shuffle<[u32; 8]> for f32 { -+ type Output = crate::codegen::f32x8; -+} -+impl Shuffle<[u32; 16]> for f32 { -+ type Output = crate::codegen::f32x16; -+} -+ -+impl Shuffle<[u32; 2]> for m32 { -+ type Output = crate::codegen::m32x2; -+} -+impl Shuffle<[u32; 4]> for m32 { -+ type Output = crate::codegen::m32x4; -+} -+impl Shuffle<[u32; 8]> for m32 { -+ type Output = crate::codegen::m32x8; -+} -+impl Shuffle<[u32; 16]> for m32 { -+ type Output = crate::codegen::m32x16; -+} -+ -+/* FIXME: 64-bit single element vector -+impl Shuffle<[u32; 1]> for i64 { -+ type Output = crate::codegen::i64x1; -+} -+*/ -+impl Shuffle<[u32; 2]> for i64 { -+ type Output = crate::codegen::i64x2; -+} -+impl Shuffle<[u32; 4]> for i64 { -+ type Output = crate::codegen::i64x4; -+} -+impl Shuffle<[u32; 8]> for i64 { -+ type Output = crate::codegen::i64x8; -+} -+ -+/* FIXME: 64-bit single element vector -+impl Shuffle<[u32; 1]> for u64 { -+ type Output = crate::codegen::u64x1; -+} -+*/ -+impl Shuffle<[u32; 2]> for u64 { -+ type Output = crate::codegen::u64x2; -+} -+impl Shuffle<[u32; 4]> for u64 { -+ type Output = crate::codegen::u64x4; -+} -+impl Shuffle<[u32; 8]> for u64 { -+ type Output = crate::codegen::u64x8; -+} -+ -+/* FIXME: 64-bit single element vector -+impl Shuffle<[u32; 1]> for f64 { -+ type Output = crate::codegen::f64x1; -+} -+*/ -+impl Shuffle<[u32; 2]> for f64 { -+ type Output = crate::codegen::f64x2; -+} -+impl Shuffle<[u32; 4]> for f64 { -+ type Output = crate::codegen::f64x4; -+} -+impl Shuffle<[u32; 8]> for f64 { -+ type Output = crate::codegen::f64x8; -+} -+ -+/* FIXME: 64-bit single element vector -+impl Shuffle<[u32; 1]> for m64 { -+ type Output = crate::codegen::m64x1; -+} -+*/ -+impl Shuffle<[u32; 2]> for m64 { -+ type Output = crate::codegen::m64x2; -+} -+impl Shuffle<[u32; 4]> for m64 { -+ type Output = crate::codegen::m64x4; -+} -+impl Shuffle<[u32; 8]> for m64 { -+ type Output = crate::codegen::m64x8; -+} -+ -+impl Shuffle<[u32; 2]> for isize { -+ type Output = crate::codegen::isizex2; -+} -+impl Shuffle<[u32; 4]> for isize { -+ type Output = crate::codegen::isizex4; -+} -+impl Shuffle<[u32; 8]> for isize { -+ type Output = crate::codegen::isizex8; -+} -+ -+impl Shuffle<[u32; 2]> for usize { -+ type Output = crate::codegen::usizex2; -+} -+impl Shuffle<[u32; 4]> for usize { -+ type Output = crate::codegen::usizex4; -+} -+impl Shuffle<[u32; 8]> for usize { -+ type Output = crate::codegen::usizex8; -+} -+ -+impl Shuffle<[u32; 2]> for *const T { -+ type Output = crate::codegen::cptrx2; -+} -+impl Shuffle<[u32; 4]> for *const T { -+ type Output = crate::codegen::cptrx4; -+} -+impl Shuffle<[u32; 8]> for *const T { -+ type Output = crate::codegen::cptrx8; -+} -+ -+impl Shuffle<[u32; 2]> for *mut T { -+ type Output = crate::codegen::mptrx2; -+} -+impl Shuffle<[u32; 4]> for *mut T { -+ type Output = crate::codegen::mptrx4; -+} -+impl Shuffle<[u32; 8]> for *mut T { -+ type Output = crate::codegen::mptrx8; -+} -+ -+impl Shuffle<[u32; 2]> for msize { -+ type Output = crate::codegen::msizex2; -+} -+impl Shuffle<[u32; 4]> for msize { -+ type Output = crate::codegen::msizex4; -+} -+impl Shuffle<[u32; 8]> for msize { -+ type Output = crate::codegen::msizex8; -+} -+ -+impl Shuffle<[u32; 1]> for i128 { -+ type Output = crate::codegen::i128x1; -+} -+impl Shuffle<[u32; 2]> for i128 { -+ type Output = crate::codegen::i128x2; -+} -+impl Shuffle<[u32; 4]> for i128 { -+ type Output = crate::codegen::i128x4; -+} -+ -+impl Shuffle<[u32; 1]> for u128 { -+ type Output = crate::codegen::u128x1; -+} -+impl Shuffle<[u32; 2]> for u128 { -+ type Output = crate::codegen::u128x2; -+} -+impl Shuffle<[u32; 4]> for u128 { -+ type Output = crate::codegen::u128x4; -+} -+ -+impl Shuffle<[u32; 1]> for m128 { -+ type Output = crate::codegen::m128x1; -+} -+impl Shuffle<[u32; 2]> for m128 { -+ type Output = crate::codegen::m128x2; -+} -+impl Shuffle<[u32; 4]> for m128 { -+ type Output = crate::codegen::m128x4; -+} -diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs -new file mode 100644 -index 000000000000..1e9f5816371a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs -@@ -0,0 +1,432 @@ -+//! Shuffle vector lanes with run-time indices. -+ -+use crate::*; -+ -+pub trait Shuffle1Dyn { -+ type Indices; -+ fn shuffle1_dyn(self, _: Self::Indices) -> Self; -+} -+ -+// Fallback implementation -+macro_rules! impl_fallback { -+ ($id:ident) => { -+ impl Shuffle1Dyn for $id { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ let mut result = Self::splat(0); -+ for i in 0..$id::lanes() { -+ result = result -+ .replace(i, self.extract(indices.extract(i) as usize)); -+ } -+ result -+ } -+ } -+ }; -+} -+ -+macro_rules! impl_shuffle1_dyn { -+ (u8x8) => { -+ cfg_if! { -+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), -+ target_feature = "ssse3"))] { -+ impl Shuffle1Dyn for u8x8 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_shuffle_pi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_shuffle_pi8; -+ -+ unsafe { -+ crate::mem::transmute( -+ _mm_shuffle_pi8( -+ crate::mem::transmute(self.0), -+ crate::mem::transmute(indices.0) -+ ) -+ ) -+ } -+ } -+ } -+ } else if #[cfg(all( -+ any( -+ all(target_aarch = "aarch64", target_feature = "neon"), -+ all(target_aarch = "arm", target_feature = "v7", -+ target_feature = "neon") -+ ), -+ any(feature = "core_arch", libcore_neon) -+ ) -+ )] { -+ impl Shuffle1Dyn for u8x8 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ #[cfg(targt_arch = "aarch64")] -+ use crate::arch::aarch64::vtbl1_u8; -+ #[cfg(targt_arch = "arm")] -+ use crate::arch::arm::vtbl1_u8; -+ -+ // This is safe because the binary is compiled with -+ // neon enabled at compile-time and can therefore only -+ // run on CPUs that have it enabled. -+ unsafe { -+ Simd(mem::transmute( -+ vtbl1_u8(mem::transmute(self.0), -+ crate::mem::transmute(indices.0)) -+ )) -+ } -+ } -+ } -+ } else { -+ impl_fallback!(u8x8); -+ } -+ } -+ }; -+ (u8x16) => { -+ cfg_if! { -+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), -+ target_feature = "ssse3"))] { -+ impl Shuffle1Dyn for u8x16 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::_mm_shuffle_epi8; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::_mm_shuffle_epi8; -+ // This is safe because the binary is compiled with -+ // ssse3 enabled at compile-time and can therefore only -+ // run on CPUs that have it enabled. -+ unsafe { -+ Simd(mem::transmute( -+ _mm_shuffle_epi8(mem::transmute(self.0), -+ crate::mem::transmute(indices)) -+ )) -+ } -+ } -+ } -+ } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", -+ any(feature = "core_arch", libcore_neon)))] { -+ impl Shuffle1Dyn for u8x16 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ use crate::arch::aarch64::vqtbl1q_u8; -+ -+ // This is safe because the binary is compiled with -+ // neon enabled at compile-time and can therefore only -+ // run on CPUs that have it enabled. -+ unsafe { -+ Simd(mem::transmute( -+ vqtbl1q_u8(mem::transmute(self.0), -+ crate::mem::transmute(indices.0)) -+ )) -+ } -+ } -+ } -+ } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", -+ target_feature = "neon", -+ any(feature = "core_arch", libcore_neon)))] { -+ impl Shuffle1Dyn for u8x16 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ use crate::arch::arm::vtbl2_u8; -+ -+ // This is safe because the binary is compiled with -+ // neon enabled at compile-time and can therefore only -+ // run on CPUs that have it enabled. -+ unsafe { -+ union U { -+ j: u8x16, -+ s: (u8x8, u8x8), -+ } -+ -+ let (i0, i1) = U { j: y }.s; -+ -+ let r0 = vtbl2_u8( -+ mem::transmute(x), -+ crate::mem::transmute(i0) -+ ); -+ let r1 = vtbl2_u8( -+ mem::transmute(x), -+ crate::mem::transmute(i1) -+ ); -+ -+ let r = U { s: (r0, r1) }.j; -+ -+ Simd(mem::transmute(r)) -+ } -+ } -+ } -+ } else { -+ impl_fallback!(u8x16); -+ } -+ } -+ }; -+ (u16x8) => { -+ impl Shuffle1Dyn for u16x8 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ let indices: u8x8 = (indices * 2).cast(); -+ let indices: u8x16 = shuffle!( -+ indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] -+ ); -+ let v = u8x16::new( -+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 -+ ); -+ let indices = indices + v; -+ unsafe { -+ let s: u8x16 =crate::mem::transmute(self); -+ crate::mem::transmute(s.shuffle1_dyn(indices)) -+ } -+ } -+ } -+ }; -+ (u32x4) => { -+ cfg_if! { -+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), -+ target_feature = "avx"))] { -+ impl Shuffle1Dyn for u32x4 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::{_mm_permutevar_ps}; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::{_mm_permutevar_ps}; -+ -+ unsafe { -+ crate::mem::transmute( -+ _mm_permutevar_ps( -+ crate::mem::transmute(self.0), -+ crate::mem::transmute(indices.0) -+ ) -+ ) -+ } -+ } -+ } -+ } else { -+ impl Shuffle1Dyn for u32x4 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ let indices: u8x4 = (indices * 4).cast(); -+ let indices: u8x16 = shuffle!( -+ indices, -+ [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] -+ ); -+ let v = u8x16::new( -+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 -+ ); -+ let indices = indices + v; -+ unsafe { -+ let s: u8x16 =crate::mem::transmute(self); -+ crate::mem::transmute(s.shuffle1_dyn(indices)) -+ } -+ } -+ } -+ } -+ } -+ }; -+ (u64x2) => { -+ cfg_if! { -+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), -+ target_feature = "avx"))] { -+ impl Shuffle1Dyn for u64x2 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ #[cfg(target_arch = "x86")] -+ use crate::arch::x86::{_mm_permutevar_pd}; -+ #[cfg(target_arch = "x86_64")] -+ use crate::arch::x86_64::{_mm_permutevar_pd}; -+ // _mm_permutevar_pd uses the _second_ bit of each -+ // element to perform the selection, that is: 0b00 => 0, -+ // 0b10 => 1: -+ let indices = indices << 1; -+ unsafe { -+ crate::mem::transmute( -+ _mm_permutevar_pd( -+ crate::mem::transmute(self), -+ crate::mem::transmute(indices) -+ ) -+ ) -+ } -+ } -+ } -+ } else { -+ impl Shuffle1Dyn for u64x2 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ let indices: u8x2 = (indices * 8).cast(); -+ let indices: u8x16 = shuffle!( -+ indices, -+ [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] -+ ); -+ let v = u8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 -+ ); -+ let indices = indices + v; -+ unsafe { -+ let s: u8x16 =crate::mem::transmute(self); -+ crate::mem::transmute(s.shuffle1_dyn(indices)) -+ } -+ } -+ } -+ } -+ } -+ }; -+ (u128x1) => { -+ impl Shuffle1Dyn for u128x1 { -+ type Indices = Self; -+ #[inline] -+ fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { -+ self -+ } -+ } -+ }; -+ ($id:ident) => { impl_fallback!($id); } -+} -+ -+impl_shuffle1_dyn!(u8x2); -+impl_shuffle1_dyn!(u8x4); -+impl_shuffle1_dyn!(u8x8); -+impl_shuffle1_dyn!(u8x16); -+impl_shuffle1_dyn!(u8x32); -+impl_shuffle1_dyn!(u8x64); -+ -+impl_shuffle1_dyn!(u16x2); -+impl_shuffle1_dyn!(u16x4); -+impl_shuffle1_dyn!(u16x8); -+impl_shuffle1_dyn!(u16x16); -+impl_shuffle1_dyn!(u16x32); -+ -+impl_shuffle1_dyn!(u32x2); -+impl_shuffle1_dyn!(u32x4); -+impl_shuffle1_dyn!(u32x8); -+impl_shuffle1_dyn!(u32x16); -+ -+impl_shuffle1_dyn!(u64x2); -+impl_shuffle1_dyn!(u64x4); -+impl_shuffle1_dyn!(u64x8); -+ -+impl_shuffle1_dyn!(usizex2); -+impl_shuffle1_dyn!(usizex4); -+impl_shuffle1_dyn!(usizex8); -+ -+impl_shuffle1_dyn!(u128x1); -+impl_shuffle1_dyn!(u128x2); -+impl_shuffle1_dyn!(u128x4); -+ -+// Implementation for non-unsigned vector types -+macro_rules! impl_shuffle1_dyn_non_u { -+ ($id:ident, $uid:ident) => { -+ impl Shuffle1Dyn for $id { -+ type Indices = $uid; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ unsafe { -+ let u: $uid = crate::mem::transmute(self); -+ crate::mem::transmute(u.shuffle1_dyn(indices)) -+ } -+ } -+ } -+ }; -+} -+ -+impl_shuffle1_dyn_non_u!(i8x2, u8x2); -+impl_shuffle1_dyn_non_u!(i8x4, u8x4); -+impl_shuffle1_dyn_non_u!(i8x8, u8x8); -+impl_shuffle1_dyn_non_u!(i8x16, u8x16); -+impl_shuffle1_dyn_non_u!(i8x32, u8x32); -+impl_shuffle1_dyn_non_u!(i8x64, u8x64); -+ -+impl_shuffle1_dyn_non_u!(i16x2, u16x2); -+impl_shuffle1_dyn_non_u!(i16x4, u16x4); -+impl_shuffle1_dyn_non_u!(i16x8, u16x8); -+impl_shuffle1_dyn_non_u!(i16x16, u16x16); -+impl_shuffle1_dyn_non_u!(i16x32, u16x32); -+ -+impl_shuffle1_dyn_non_u!(i32x2, u32x2); -+impl_shuffle1_dyn_non_u!(i32x4, u32x4); -+impl_shuffle1_dyn_non_u!(i32x8, u32x8); -+impl_shuffle1_dyn_non_u!(i32x16, u32x16); -+ -+impl_shuffle1_dyn_non_u!(i64x2, u64x2); -+impl_shuffle1_dyn_non_u!(i64x4, u64x4); -+impl_shuffle1_dyn_non_u!(i64x8, u64x8); -+ -+impl_shuffle1_dyn_non_u!(isizex2, usizex2); -+impl_shuffle1_dyn_non_u!(isizex4, usizex4); -+impl_shuffle1_dyn_non_u!(isizex8, usizex8); -+ -+impl_shuffle1_dyn_non_u!(i128x1, u128x1); -+impl_shuffle1_dyn_non_u!(i128x2, u128x2); -+impl_shuffle1_dyn_non_u!(i128x4, u128x4); -+ -+impl_shuffle1_dyn_non_u!(m8x2, u8x2); -+impl_shuffle1_dyn_non_u!(m8x4, u8x4); -+impl_shuffle1_dyn_non_u!(m8x8, u8x8); -+impl_shuffle1_dyn_non_u!(m8x16, u8x16); -+impl_shuffle1_dyn_non_u!(m8x32, u8x32); -+impl_shuffle1_dyn_non_u!(m8x64, u8x64); -+ -+impl_shuffle1_dyn_non_u!(m16x2, u16x2); -+impl_shuffle1_dyn_non_u!(m16x4, u16x4); -+impl_shuffle1_dyn_non_u!(m16x8, u16x8); -+impl_shuffle1_dyn_non_u!(m16x16, u16x16); -+impl_shuffle1_dyn_non_u!(m16x32, u16x32); -+ -+impl_shuffle1_dyn_non_u!(m32x2, u32x2); -+impl_shuffle1_dyn_non_u!(m32x4, u32x4); -+impl_shuffle1_dyn_non_u!(m32x8, u32x8); -+impl_shuffle1_dyn_non_u!(m32x16, u32x16); -+ -+impl_shuffle1_dyn_non_u!(m64x2, u64x2); -+impl_shuffle1_dyn_non_u!(m64x4, u64x4); -+impl_shuffle1_dyn_non_u!(m64x8, u64x8); -+ -+impl_shuffle1_dyn_non_u!(msizex2, usizex2); -+impl_shuffle1_dyn_non_u!(msizex4, usizex4); -+impl_shuffle1_dyn_non_u!(msizex8, usizex8); -+ -+impl_shuffle1_dyn_non_u!(m128x1, u128x1); -+impl_shuffle1_dyn_non_u!(m128x2, u128x2); -+impl_shuffle1_dyn_non_u!(m128x4, u128x4); -+ -+impl_shuffle1_dyn_non_u!(f32x2, u32x2); -+impl_shuffle1_dyn_non_u!(f32x4, u32x4); -+impl_shuffle1_dyn_non_u!(f32x8, u32x8); -+impl_shuffle1_dyn_non_u!(f32x16, u32x16); -+ -+impl_shuffle1_dyn_non_u!(f64x2, u64x2); -+impl_shuffle1_dyn_non_u!(f64x4, u64x4); -+impl_shuffle1_dyn_non_u!(f64x8, u64x8); -+ -+// Implementation for non-unsigned vector types -+macro_rules! impl_shuffle1_dyn_ptr { -+ ($id:ident, $uid:ident) => { -+ impl Shuffle1Dyn for $id { -+ type Indices = $uid; -+ #[inline] -+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { -+ unsafe { -+ let u: $uid = crate::mem::transmute(self); -+ crate::mem::transmute(u.shuffle1_dyn(indices)) -+ } -+ } -+ } -+ }; -+} -+ -+impl_shuffle1_dyn_ptr!(cptrx2, usizex2); -+impl_shuffle1_dyn_ptr!(cptrx4, usizex4); -+impl_shuffle1_dyn_ptr!(cptrx8, usizex8); -+ -+impl_shuffle1_dyn_ptr!(mptrx2, usizex2); -+impl_shuffle1_dyn_ptr!(mptrx4, usizex4); -+impl_shuffle1_dyn_ptr!(mptrx8, usizex8); -diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs -new file mode 100644 -index 000000000000..b435fb5da120 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs -@@ -0,0 +1,189 @@ -+//! Horizontal swap bytes reductions. -+ -+// FIXME: investigate using `llvm.bswap` -+// https://github.com/rust-lang-nursery/packed_simd/issues/19 -+ -+use crate::*; -+ -+crate trait SwapBytes { -+ fn swap_bytes(self) -> Self; -+} -+ -+macro_rules! impl_swap_bytes { -+ (v16: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ fn swap_bytes(self) -> Self { -+ unsafe { shuffle!(self, [1, 0]) } -+ } -+ } -+ )+ -+ }; -+ (v32: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ #[allow(clippy::useless_transmute)] -+ fn swap_bytes(self) -> Self { -+ unsafe { -+ let bytes: u8x4 = crate::mem::transmute(self); -+ let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); -+ crate::mem::transmute(result) -+ } -+ } -+ } -+ )+ -+ }; -+ (v64: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ #[allow(clippy::useless_transmute)] -+ fn swap_bytes(self) -> Self { -+ unsafe { -+ let bytes: u8x8 = crate::mem::transmute(self); -+ let result: u8x8 = shuffle!( -+ bytes, [7, 6, 5, 4, 3, 2, 1, 0] -+ ); -+ crate::mem::transmute(result) -+ } -+ } -+ } -+ )+ -+ }; -+ (v128: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ #[allow(clippy::useless_transmute)] -+ fn swap_bytes(self) -> Self { -+ unsafe { -+ let bytes: u8x16 = crate::mem::transmute(self); -+ let result: u8x16 = shuffle!(bytes, [ -+ 15, 14, 13, 12, 11, 10, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ]); -+ crate::mem::transmute(result) -+ } -+ } -+ } -+ )+ -+ }; -+ (v256: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ #[allow(clippy::useless_transmute)] -+ fn swap_bytes(self) -> Self { -+ unsafe { -+ let bytes: u8x32 = crate::mem::transmute(self); -+ let result: u8x32 = shuffle!(bytes, [ -+ 31, 30, 29, 28, 27, 26, 25, 24, -+ 23, 22, 21, 20, 19, 18, 17, 16, -+ 15, 14, 13, 12, 11, 10, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ]); -+ crate::mem::transmute(result) -+ } -+ } -+ } -+ )+ -+ }; -+ (v512: $($id:ident,)+) => { -+ $( -+ impl SwapBytes for $id { -+ #[inline] -+ #[allow(clippy::useless_transmute)] -+ fn swap_bytes(self) -> Self { -+ unsafe { -+ let bytes: u8x64 = crate::mem::transmute(self); -+ let result: u8x64 = shuffle!(bytes, [ -+ 63, 62, 61, 60, 59, 58, 57, 56, -+ 55, 54, 53, 52, 51, 50, 49, 48, -+ 47, 46, 45, 44, 43, 42, 41, 40, -+ 39, 38, 37, 36, 35, 34, 33, 32, -+ 31, 30, 29, 28, 27, 26, 25, 24, -+ 23, 22, 21, 20, 19, 18, 17, 16, -+ 15, 14, 13, 12, 11, 10, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ]); -+ crate::mem::transmute(result) -+ } -+ } -+ } -+ )+ -+ }; -+} -+ -+impl_swap_bytes!(v16: u8x2, i8x2,); -+impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); -+// FIXME: 64-bit single element vector -+impl_swap_bytes!( -+ v64: u8x8, -+ i8x8, -+ u16x4, -+ i16x4, -+ u32x2, -+ i32x2, /* u64x1, i64x1, */ -+); -+ -+impl_swap_bytes!( -+ v128: u8x16, -+ i8x16, -+ u16x8, -+ i16x8, -+ u32x4, -+ i32x4, -+ u64x2, -+ i64x2, -+ u128x1, -+ i128x1, -+); -+impl_swap_bytes!( -+ v256: u8x32, -+ i8x32, -+ u16x16, -+ i16x16, -+ u32x8, -+ i32x8, -+ u64x4, -+ i64x4, -+ u128x2, -+ i128x2, -+); -+ -+impl_swap_bytes!( -+ v512: u8x64, -+ i8x64, -+ u16x32, -+ i16x32, -+ u32x16, -+ i32x16, -+ u64x8, -+ i64x8, -+ u128x4, -+ i128x4, -+); -+ -+cfg_if! { -+ if #[cfg(target_pointer_width = "8")] { -+ impl_swap_bytes!(v16: isizex2, usizex2,); -+ impl_swap_bytes!(v32: isizex4, usizex4,); -+ impl_swap_bytes!(v64: isizex8, usizex8,); -+ } else if #[cfg(target_pointer_width = "16")] { -+ impl_swap_bytes!(v32: isizex2, usizex2,); -+ impl_swap_bytes!(v64: isizex4, usizex4,); -+ impl_swap_bytes!(v128: isizex8, usizex8,); -+ } else if #[cfg(target_pointer_width = "32")] { -+ impl_swap_bytes!(v64: isizex2, usizex2,); -+ impl_swap_bytes!(v128: isizex4, usizex4,); -+ impl_swap_bytes!(v256: isizex8, usizex8,); -+ } else if #[cfg(target_pointer_width = "64")] { -+ impl_swap_bytes!(v128: isizex2, usizex2,); -+ impl_swap_bytes!(v256: isizex4, usizex4,); -+ impl_swap_bytes!(v512: isizex8, usizex8,); -+ } else { -+ compile_error!("unsupported target_pointer_width"); -+ } -+} -diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs -new file mode 100644 -index 000000000000..9506424fadad ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v128.rs -@@ -0,0 +1,46 @@ -+//! Internal 128-bit wide vector types -+ -+use crate::masks::*; -+ -+#[rustfmt::skip] -+impl_simd_array!( -+ [i8; 16]: i8x16 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u8; 16]: u8x16 | -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m8; 16]: m8x16 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+ -+impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16); -+impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16); -+impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16); -+ -+impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32); -+impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32); -+impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32); -+impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32); -+ -+impl_simd_array!([i64; 2]: i64x2 | i64, i64); -+impl_simd_array!([u64; 2]: u64x2 | u64, u64); -+impl_simd_array!([f64; 2]: f64x2 | f64, f64); -+impl_simd_array!([m64; 2]: m64x2 | i64, i64); -+ -+impl_simd_array!([i128; 1]: i128x1 | i128); -+impl_simd_array!([u128; 1]: u128x1 | u128); -+impl_simd_array!([m128; 1]: m128x1 | i128); -diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs -new file mode 100644 -index 000000000000..4d55a6d8998e ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v16.rs -@@ -0,0 +1,7 @@ -+//! Internal 16-bit wide vector types -+ -+use crate::masks::*; -+ -+impl_simd_array!([i8; 2]: i8x2 | i8, i8); -+impl_simd_array!([u8; 2]: u8x2 | u8, u8); -+impl_simd_array!([m8; 2]: m8x2 | i8, i8); -diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs -new file mode 100644 -index 000000000000..5ca4759f0c0a ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v256.rs -@@ -0,0 +1,78 @@ -+//! Internal 256-bit wide vector types -+ -+use crate::masks::*; -+ -+#[rustfmt::skip] -+impl_simd_array!( -+ [i8; 32]: i8x32 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u8; 32]: u8x32 | -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m8; 32]: m8x32 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [i16; 16]: i16x16 | -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u16; 16]: u16x16 | -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m16; 16]: m16x16 | -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16 -+); -+ -+impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32); -+impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32); -+impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32); -+impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32); -+ -+impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64); -+impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64); -+impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64); -+impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64); -+ -+impl_simd_array!([i128; 2]: i128x2 | i128, i128); -+impl_simd_array!([u128; 2]: u128x2 | u128, u128); -+impl_simd_array!([m128; 2]: m128x2 | i128, i128); -diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs -new file mode 100644 -index 000000000000..ae1dabd00c22 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v32.rs -@@ -0,0 +1,11 @@ -+//! Internal 32-bit wide vector types -+ -+use crate::masks::*; -+ -+impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8); -+impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8); -+impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8); -+ -+impl_simd_array!([i16; 2]: i16x2 | i16, i16); -+impl_simd_array!([u16; 2]: u16x2 | u16, u16); -+impl_simd_array!([m16; 2]: m16x2 | i16, i16); -diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs -new file mode 100644 -index 000000000000..bf95110340d6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v512.rs -@@ -0,0 +1,145 @@ -+//! Internal 512-bit wide vector types -+ -+use crate::masks::*; -+ -+#[rustfmt::skip] -+impl_simd_array!( -+ [i8; 64]: i8x64 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u8; 64]: u8x64 | -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8, -+ u8, u8, u8, u8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m8; 64]: m8x64 | -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8, -+ i8, i8, i8, i8 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [i16; 32]: i16x32 | -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u16; 32]: u16x32 | -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16, -+ u16, u16, u16, u16 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m16; 32]: m16x32 | -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16, -+ i16, i16, i16, i16 -+); -+ -+#[rustfmt::skip] -+impl_simd_array!( -+ [i32; 16]: i32x16 | -+ i32, i32, i32, i32, -+ i32, i32, i32, i32, -+ i32, i32, i32, i32, -+ i32, i32, i32, i32 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [u32; 16]: u32x16 | -+ u32, u32, u32, u32, -+ u32, u32, u32, u32, -+ u32, u32, u32, u32, -+ u32, u32, u32, u32 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [f32; 16]: f32x16 | -+ f32, f32, f32, f32, -+ f32, f32, f32, f32, -+ f32, f32, f32, f32, -+ f32, f32, f32, f32 -+); -+#[rustfmt::skip] -+impl_simd_array!( -+ [m32; 16]: m32x16 | -+ i32, i32, i32, i32, -+ i32, i32, i32, i32, -+ i32, i32, i32, i32, -+ i32, i32, i32, i32 -+); -+ -+impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64); -+impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64); -+impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64); -+impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64); -+ -+impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128); -+impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128); -+impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128); -diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs -new file mode 100644 -index 000000000000..3cfb67c1a013 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/v64.rs -@@ -0,0 +1,21 @@ -+//! Internal 64-bit wide vector types -+ -+use crate::masks::*; -+ -+impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8); -+impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8); -+impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8); -+ -+impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16); -+impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16); -+impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16); -+ -+impl_simd_array!([i32; 2]: i32x2 | i32, i32); -+impl_simd_array!([u32; 2]: u32x2 | u32, u32); -+impl_simd_array!([f32; 2]: f32x2 | f32, f32); -+impl_simd_array!([m32; 2]: m32x2 | i32, i32); -+ -+impl_simd_array!([i64; 1]: i64x1 | i64); -+impl_simd_array!([u64; 1]: u64x1 | u64); -+impl_simd_array!([f64; 1]: f64x1 | f64); -+impl_simd_array!([m64; 1]: m64x1 | i64); -diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs -new file mode 100644 -index 000000000000..1f2bc7714dd9 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs -@@ -0,0 +1,33 @@ -+//! Pointer vector types -+ -+macro_rules! impl_simd_ptr { -+ ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident -+ | $($tys:ty),*) => { -+ #[derive(Copy, Clone)] -+ #[repr(simd)] -+ pub struct $tuple_id<$ty>($(crate $tys),*); -+ //^^^^^^^ leaked through SimdArray -+ -+ impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] { -+ type Tuple = $tuple_id<$ptr_ty>; -+ type T = $ptr_ty; -+ const N: usize = $elem_count; -+ type NT = [u32; $elem_count]; -+ } -+ -+ impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> { -+ type Element = $ptr_ty; -+ const LANES: usize = $elem_count; -+ type LanesType = [u32; $elem_count]; -+ } -+ -+ } -+} -+ -+impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T); -+impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T); -+impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T); -+ -+impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T); -+impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T); -+impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T); -diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs -new file mode 100644 -index 000000000000..3911b21340c8 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/codegen/vSize.rs -@@ -0,0 +1,43 @@ -+//! Vector types with pointer-sized elements -+ -+use crate::codegen::pointer_sized_int::{isize_, usize_}; -+use crate::masks::*; -+ -+impl_simd_array!([isize; 2]: isizex2 | isize_, isize_); -+impl_simd_array!([usize; 2]: usizex2 | usize_, usize_); -+impl_simd_array!([msize; 2]: msizex2 | isize_, isize_); -+ -+impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_); -+impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_); -+impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_); -+ -+impl_simd_array!( -+ [isize; 8]: isizex8 | isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_ -+); -+impl_simd_array!( -+ [usize; 8]: usizex8 | usize_, -+ usize_, -+ usize_, -+ usize_, -+ usize_, -+ usize_, -+ usize_, -+ usize_ -+); -+impl_simd_array!( -+ [msize; 8]: msizex8 | isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_, -+ isize_ -+); -diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs -new file mode 100644 -index 000000000000..d73645e72fbe ---- /dev/null -+++ b/third_party/rust/packed_simd/src/lib.rs -@@ -0,0 +1,327 @@ -+//! # Portable packed SIMD vectors -+//! -+//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366: -+//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) . -+//! -+//! The examples available in the -+//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples) -+//! sub-directory of the crate showcase how to use the library in practice. -+//! -+//! ## Table of contents -+//! -+//! - [Introduction](#introduction) -+//! - [Vector types](#vector-types) -+//! - [Conditional operations](#conditional-operations) -+//! - [Conversions](#conversions) -+//! - [Performance -+//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/) -+//! -+//! ## Introduction -+//! -+//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N` -+//! elements of type `T` as well as many type aliases for this type: for -+//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`. -+//! -+//! The operations on packed vectors are, by default, "vertical", that is, they -+//! are applied to each vector lane in isolation of the others: -+//! -+//! ``` -+//! # use packed_simd::*; -+//! let a = i32x4::new(1, 2, 3, 4); -+//! let b = i32x4::new(5, 6, 7, 8); -+//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12)); -+//! ``` -+//! -+//! Many "horizontal" operations are also provided: -+//! -+//! ``` -+//! # use packed_simd::*; -+//! # let a = i32x4::new(1, 2, 3, 4); -+//! assert_eq!(a.wrapping_sum(), 10); -+//! ``` -+//! -+//! In virtually all architectures vertical operations are fast, while -+//! horizontal operations are, by comparison, much slower. That is, the -+//! most portably-efficient way of performing a reduction over a slice -+//! is to collect the results into a vector using vertical operations, -+//! and performing a single horizontal operation at the end: -+//! -+//! ``` -+//! # use packed_simd::*; -+//! fn reduce(x: &[i32]) -> i32 { -+//! assert!(x.len() % 4 == 0); -+//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0] -+//! for i in (0..x.len()).step_by(4) { -+//! sum += i32x4::from_slice_unaligned(&x[i..]); -+//! } -+//! sum.wrapping_sum() -+//! } -+//! -+//! let x = [0, 1, 2, 3, 4, 5, 6, 7]; -+//! assert_eq!(reduce(&x), 28); -+//! ``` -+//! -+//! ## Vector types -+//! -+//! The vector type aliases are named according to the following scheme: -+//! -+//! > `{element_type}x{number_of_lanes} == Simd<[element_type; -+//! number_of_lanes]>` -+//! -+//! where the following element types are supported: -+//! -+//! * `i{element_width}`: signed integer -+//! * `u{element_width}`: unsigned integer -+//! * `f{element_width}`: float -+//! * `m{element_width}`: mask (see below) -+//! * `*{const,mut} T`: `const` and `mut` pointers -+//! -+//! ## Basic operations -+//! -+//! ``` -+//! # use packed_simd::*; -+//! // Sets all elements to `0`: -+//! let a = i32x4::splat(0); -+//! -+//! // Reads a vector from a slice: -+//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5]; -+//! let b = i32x4::from_slice_unaligned(&arr); -+//! -+//! // Reads the 4-th element of a vector: -+//! assert_eq!(b.extract(3), 1); -+//! -+//! // Returns a new vector where the 4-th element is replaced with `1`: -+//! let a = a.replace(3, 1); -+//! assert_eq!(a, b); -+//! -+//! // Writes a vector to a slice: -+//! let a = a.replace(2, 1); -+//! a.write_to_slice_unaligned(&mut arr[4..]); -+//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]); -+//! ``` -+//! -+//! ## Conditional operations -+//! -+//! One often needs to perform an operation on some lanes of the vector. Vector -+//! masks, like `m32x4`, allow selecting on which vector lanes an operation is -+//! to be performed: -+//! -+//! ``` -+//! # use packed_simd::*; -+//! let a = i32x4::new(1, 1, 2, 2); -+//! -+//! // Add `1` to the first two lanes of the vector. -+//! let m = m16x4::new(true, true, false, false); -+//! let a = m.select(a + 1, a); -+//! assert_eq!(a, i32x4::splat(2)); -+//! ``` -+//! -+//! The elements of a vector mask are either `true` or `false`. Here `true` -+//! means that a lane is "selected", while `false` means that a lane is not -+//! selected. -+//! -+//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that -+//! works on all vectors that have the same number of lanes as the mask. The -+//! resulting vector contains the elements of `a` for those lanes for which the -+//! mask is `true`, and the elements of `b` otherwise. -+//! -+//! The example constructs a mask with the first two lanes set to `true` and -+//! the last two lanes set to `false`. This selects the first two lanes of `a + -+//! 1` and the last two lanes of `a`, producing a vector where the first two -+//! lanes have been incremented by `1`. -+//! -+//! > note: mask `select` can be used on vector types that have the same number -+//! > of lanes as the mask. The example shows this by using [`m16x4`] instead -+//! > of [`m32x4`]. It is _typically_ more performant to use a mask element -+//! > width equal to the element width of the vectors being operated upon. -+//! > This is, however, not true for 512-bit wide vectors when targetting -+//! > AVX-512, where the most efficient masks use only 1-bit per element. -+//! -+//! All vertical comparison operations returns masks: -+//! -+//! ``` -+//! # use packed_simd::*; -+//! let a = i32x4::new(1, 1, 3, 3); -+//! let b = i32x4::new(2, 2, 0, 0); -+//! -+//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne). -+//! let m = a.ge(i32x4::splat(2)); -+//! -+//! if m.any() { -+//! // all / any / none allow coherent control flow -+//! let d = m.select(a, b); -+//! assert_eq!(d, i32x4::new(2, 2, 3, 3)); -+//! } -+//! ``` -+//! -+//! ## Conversions -+//! -+//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for -+//! vectors with the same number of lanes when the conversion is value -+//! preserving (same as in `std`). -+//! -+//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the -+//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise -+//! `transmute`s when all bit patterns of the source type are valid bit -+//! patterns of the target type and are also implemented for the -+//! architecture-specific vector types of `std::arch`. For example, `let x: -+//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit -+//! patterns are valid `u8x8` bit patterns. However, the opposite is not -+//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this -+//! operation cannot be peformed safely using `x.into_bits()`; one needs to -+//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the -+//! value in the `u8x8` is a valid bit-pattern of `m8x8`. -+//! -+//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`] -+//! (`x.cast()`), just like `as`: -+//! -+//! * casting integer vectors whose lane types have the same size (e.g. -+//! `i32xN` -> `u32xN`) is a **no-op**, -+//! -+//! * casting from a larger integer to a smaller integer (e.g. `u32xN` -> -+//! `u8xN`) will **truncate**, -+//! -+//! * casting from a smaller integer to a larger integer (e.g. `u8xN` -> -+//! `u32xN`) will: -+//! * **zero-extend** if the source is unsigned, or -+//! * **sign-extend** if the source is signed, -+//! -+//! * casting from a float to an integer will **round the float towards -+//! zero**, -+//! -+//! * casting from an integer to float will produce the floating point -+//! representation of the integer, **rounding to nearest, ties to even**, -+//! -+//! * casting from an `f32` to an `f64` is perfect and lossless, -+//! -+//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. -+//! -+//! Numeric casts are not very "precise": sometimes lossy, sometimes value -+//! preserving, etc. -+ -+#![feature( -+ repr_simd, -+ const_fn, -+ platform_intrinsics, -+ stdsimd, -+ aarch64_target_feature, -+ arm_target_feature, -+ link_llvm_intrinsics, -+ core_intrinsics, -+ stmt_expr_attributes, -+ align_offset, -+ mmx_target_feature, -+ crate_visibility_modifier, -+ custom_inner_attributes -+)] -+#![allow(non_camel_case_types, non_snake_case, -+ clippy::cast_possible_truncation, -+ clippy::cast_lossless, -+ clippy::cast_possible_wrap, -+ clippy::cast_precision_loss, -+ // This lint is currently broken for generic code -+ // See https://github.com/rust-lang/rust-clippy/issues/3410 -+ clippy::use_self -+)] -+#![cfg_attr(test, feature(hashmap_internals))] -+#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)] -+#![no_std] -+ -+use cfg_if::cfg_if; -+ -+cfg_if! { -+ if #[cfg(feature = "core_arch")] { -+ #[allow(unused_imports)] -+ use core_arch as arch; -+ } else { -+ #[allow(unused_imports)] -+ use core::arch; -+ } -+} -+ -+#[cfg(all(target_arch = "wasm32", test))] -+use wasm_bindgen_test::*; -+ -+#[allow(unused_imports)] -+use core::{ -+ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, -+ i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice, -+ u128, u16, u32, u64, u8, usize, -+}; -+ -+#[macro_use] -+mod testing; -+#[macro_use] -+mod api; -+mod codegen; -+mod sealed; -+ -+/// Packed SIMD vector type. -+/// -+/// # Examples -+/// -+/// ``` -+/// # use packed_simd::Simd; -+/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3); -+/// assert_eq!(v.extract(2), 2); -+/// ``` -+#[repr(transparent)] -+#[derive(Copy, Clone)] -+pub struct Simd( -+ // FIXME: this type should be private, -+ // but it currently must be public for the -+ // `shuffle!` macro to work: it needs to -+ // access the internal `repr(simd)` type -+ // to call the shuffle intrinsics. -+ #[doc(hidden)] pub ::Tuple, -+); -+ -+/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd` -+/// and/or `Ord` traits. -+#[repr(transparent)] -+#[derive(Copy, Clone, Debug)] -+#[allow(clippy::missing_inline_in_public_items)] -+pub struct LexicographicallyOrdered(T); -+ -+mod masks; -+pub use self::masks::*; -+ -+mod v16; -+pub use self::v16::*; -+ -+mod v32; -+pub use self::v32::*; -+ -+mod v64; -+pub use self::v64::*; -+ -+mod v128; -+pub use self::v128::*; -+ -+mod v256; -+pub use self::v256::*; -+ -+mod v512; -+pub use self::v512::*; -+ -+mod vSize; -+pub use self::vSize::*; -+ -+mod vPtr; -+pub use self::vPtr::*; -+ -+pub use self::api::cast::*; -+ -+#[cfg(feature = "into_bits")] -+pub use self::api::into_bits::*; -+ -+// Re-export the shuffle intrinsics required by the `shuffle!` macro. -+#[doc(hidden)] -+pub use self::codegen::llvm::{ -+ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, -+ __shuffle_vector4, __shuffle_vector64, __shuffle_vector8, -+}; -+ -+crate mod llvm { -+ crate use crate::codegen::llvm::*; -+} -diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs -new file mode 100644 -index 000000000000..f83c4da95750 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/masks.rs -@@ -0,0 +1,128 @@ -+//! Mask types -+ -+macro_rules! impl_mask_ty { -+ ($id:ident : $elem_ty:ident | #[$doc:meta]) => { -+ #[$doc] -+ #[derive(Copy, Clone)] -+ pub struct $id($elem_ty); -+ -+ impl crate::sealed::Mask for $id { -+ fn test(&self) -> bool { -+ $id::test(self) -+ } -+ } -+ -+ impl $id { -+ /// Instantiate a mask with `value` -+ #[inline] -+ pub fn new(x: bool) -> Self { -+ if x { -+ $id(!0) -+ } else { -+ $id(0) -+ } -+ } -+ /// Test if the mask is set -+ #[inline] -+ pub fn test(&self) -> bool { -+ self.0 != 0 -+ } -+ } -+ -+ impl Default for $id { -+ #[inline] -+ fn default() -> Self { -+ $id(0) -+ } -+ } -+ -+ #[allow(clippy::partialeq_ne_impl)] -+ impl PartialEq<$id> for $id { -+ #[inline] -+ fn eq(&self, other: &Self) -> bool { -+ self.0 == other.0 -+ } -+ #[inline] -+ fn ne(&self, other: &Self) -> bool { -+ self.0 != other.0 -+ } -+ } -+ -+ impl Eq for $id {} -+ -+ impl PartialOrd<$id> for $id { -+ #[inline] -+ fn partial_cmp( -+ &self, other: &Self, -+ ) -> Option { -+ use crate::cmp::Ordering; -+ if self == other { -+ Some(Ordering::Equal) -+ } else if self.0 > other.0 { -+ // Note: -+ // * false = 0_i -+ // * true == !0_i == -1_i -+ Some(Ordering::Less) -+ } else { -+ Some(Ordering::Greater) -+ } -+ } -+ -+ #[inline] -+ fn lt(&self, other: &Self) -> bool { -+ self.0 > other.0 -+ } -+ #[inline] -+ fn gt(&self, other: &Self) -> bool { -+ self.0 < other.0 -+ } -+ #[inline] -+ fn le(&self, other: &Self) -> bool { -+ self.0 >= other.0 -+ } -+ #[inline] -+ fn ge(&self, other: &Self) -> bool { -+ self.0 <= other.0 -+ } -+ } -+ -+ impl Ord for $id { -+ #[inline] -+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { -+ match self.partial_cmp(other) { -+ Some(x) => x, -+ None => unsafe { crate::hint::unreachable_unchecked() }, -+ } -+ } -+ } -+ -+ impl crate::hash::Hash for $id { -+ #[inline] -+ fn hash(&self, state: &mut H) { -+ (self.0 != 0).hash(state); -+ } -+ } -+ -+ impl crate::fmt::Debug for $id { -+ #[inline] -+ fn fmt( -+ &self, fmtter: &mut crate::fmt::Formatter<'_>, -+ ) -> Result<(), crate::fmt::Error> { -+ write!(fmtter, "{}({})", stringify!($id), self.0 != 0) -+ } -+ } -+ }; -+} -+ -+impl_mask_ty!(m8: i8 | /// 8-bit wide mask. -+); -+impl_mask_ty!(m16: i16 | /// 16-bit wide mask. -+); -+impl_mask_ty!(m32: i32 | /// 32-bit wide mask. -+); -+impl_mask_ty!(m64: i64 | /// 64-bit wide mask. -+); -+impl_mask_ty!(m128: i128 | /// 128-bit wide mask. -+); -+impl_mask_ty!(msize: isize | /// isize-wide mask. -+); -diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs -new file mode 100644 -index 000000000000..832acd3f1d54 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/sealed.rs -@@ -0,0 +1,41 @@ -+//! Sealed traits -+ -+/// Trait implemented by arrays that can be SIMD types. -+#[doc(hidden)] -+pub trait SimdArray { -+ /// The type of the #[repr(simd)] type. -+ type Tuple: Copy + Clone; -+ /// The element type of the vector. -+ type T; -+ /// The number of elements in the array. -+ const N: usize; -+ /// The type: `[u32; Self::N]`. -+ type NT; -+} -+ -+/// This traits is used to constraint the arguments -+/// and result type of the portable shuffles. -+#[doc(hidden)] -+pub trait Shuffle { -+ // Lanes is a `[u32; N]` where `N` is the number of vector lanes -+ -+ /// The result type of the shuffle. -+ type Output; -+} -+ -+/// This trait is implemented by all SIMD vector types. -+#[doc(hidden)] -+pub trait Simd { -+ /// Element type of the SIMD vector -+ type Element; -+ /// The number of elements in the SIMD vector. -+ const LANES: usize; -+ /// The type: `[u32; Self::N]`. -+ type LanesType; -+} -+ -+/// This trait is implemented by all mask types -+#[doc(hidden)] -+pub trait Mask { -+ fn test(&self) -> bool; -+} -diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs -new file mode 100644 -index 000000000000..fcbcf9e2ac8e ---- /dev/null -+++ b/third_party/rust/packed_simd/src/testing.rs -@@ -0,0 +1,8 @@ -+//! Testing macros and other utilities. -+ -+#[macro_use] -+mod macros; -+ -+#[cfg(test)] -+#[macro_use] -+crate mod utils; -diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs -new file mode 100644 -index 000000000000..6008634c76ce ---- /dev/null -+++ b/third_party/rust/packed_simd/src/testing/macros.rs -@@ -0,0 +1,44 @@ -+//! Testing macros -+ -+macro_rules! test_if { -+ ($cfg_tt:tt: $it:item) => { -+ #[cfg(any( -+ // Test everything if: -+ // -+ // * tests are enabled, -+ // * no features about exclusively testing -+ // specific vector classes are enabled -+ all(test, not(any( -+ test_v16, -+ test_v32, -+ test_v64, -+ test_v128, -+ test_v256, -+ test_v512, -+ test_none, // disables all tests -+ ))), -+ // Test if: -+ // -+ // * tests are enabled -+ // * a particular cfg token tree returns true -+ all(test, $cfg_tt), -+ ))] -+ $it -+ }; -+} -+ -+#[cfg(test)] -+#[allow(unused)] -+macro_rules! ref_ { -+ ($anything:tt) => { -+ &$anything -+ }; -+} -+ -+#[cfg(test)] -+#[allow(unused)] -+macro_rules! ref_mut_ { -+ ($anything:tt) => { -+ &mut $anything -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs -new file mode 100644 -index 000000000000..7b8f21ac1c55 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/testing/utils.rs -@@ -0,0 +1,135 @@ -+//! Testing utilities -+ -+#![allow(dead_code)] -+ -+use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered}; -+ -+/// Tests PartialOrd for `a` and `b` where `a < b` is true. -+pub fn test_lt( -+ a: LexicographicallyOrdered, b: LexicographicallyOrdered, -+) where -+ LexicographicallyOrdered: Debug + PartialOrd, -+{ -+ assert!(a < b, "{:?}, {:?}", a, b); -+ assert!(b > a, "{:?}, {:?}", a, b); -+ -+ assert!(!(a == b), "{:?}, {:?}", a, b); -+ assert!(a != b, "{:?}, {:?}", a, b); -+ -+ assert!(a <= b, "{:?}, {:?}", a, b); -+ assert!(b >= a, "{:?}, {:?}", a, b); -+ -+ // Irreflexivity -+ assert!(!(a < a), "{:?}, {:?}", a, b); -+ assert!(!(b < b), "{:?}, {:?}", a, b); -+ assert!(!(a > a), "{:?}, {:?}", a, b); -+ assert!(!(b > b), "{:?}, {:?}", a, b); -+ -+ assert!(a <= a, "{:?}, {:?}", a, b); -+ assert!(b <= b, "{:?}, {:?}", a, b); -+} -+ -+/// Tests PartialOrd for `a` and `b` where `a <= b` is true. -+pub fn test_le( -+ a: LexicographicallyOrdered, b: LexicographicallyOrdered, -+) where -+ LexicographicallyOrdered: Debug + PartialOrd, -+{ -+ assert!(a <= b, "{:?}, {:?}", a, b); -+ assert!(b >= a, "{:?}, {:?}", a, b); -+ -+ assert!(a == b || a < b, "{:?}, {:?}", a, b); -+ assert!(a == b || b > a, "{:?}, {:?}", a, b); -+ -+ if a == b { -+ assert!(!(a < b), "{:?}, {:?}", a, b); -+ assert!(!(b > a), "{:?}, {:?}", a, b); -+ -+ assert!(!(a != b), "{:?}, {:?}", a, b); -+ } else { -+ assert!(a != b, "{:?}, {:?}", a, b); -+ test_lt(a, b); -+ } -+} -+ -+/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering` -+pub fn test_cmp( -+ a: LexicographicallyOrdered, b: LexicographicallyOrdered, -+ o: Option, -+) where -+ LexicographicallyOrdered: PartialOrd + Debug, -+ T: Debug + crate::sealed::Simd + Copy + Clone, -+ ::Element: Default + Copy + Clone + PartialOrd, -+{ -+ assert!(T::LANES <= 64, "array length in these two arrays needs updating"); -+ let mut arr_a: [T::Element; 64] = [Default::default(); 64]; -+ let mut arr_b: [T::Element; 64] = [Default::default(); 64]; -+ -+ unsafe { -+ crate::ptr::write_unaligned( -+ arr_a.as_mut_ptr() as *mut LexicographicallyOrdered, -+ a, -+ ) -+ } -+ unsafe { -+ crate::ptr::write_unaligned( -+ arr_b.as_mut_ptr() as *mut LexicographicallyOrdered, -+ b, -+ ) -+ } -+ let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]); -+ let result = a.partial_cmp(&b); -+ assert_eq!(expected, result, "{:?}, {:?}", a, b); -+ assert_eq!(o, result, "{:?}, {:?}", a, b); -+ match o { -+ Some(crate::cmp::Ordering::Less) => { -+ test_lt(a, b); -+ test_le(a, b); -+ } -+ Some(crate::cmp::Ordering::Greater) => { -+ test_lt(b, a); -+ test_le(b, a); -+ } -+ Some(crate::cmp::Ordering::Equal) => { -+ assert!(a == b, "{:?}, {:?}", a, b); -+ assert!(!(a != b), "{:?}, {:?}", a, b); -+ assert!(!(a < b), "{:?}, {:?}", a, b); -+ assert!(!(b < a), "{:?}, {:?}", a, b); -+ assert!(!(a > b), "{:?}, {:?}", a, b); -+ assert!(!(b > a), "{:?}, {:?}", a, b); -+ -+ test_le(a, b); -+ test_le(b, a); -+ } -+ None => { -+ assert!(!(a == b), "{:?}, {:?}", a, b); -+ assert!(!(a != b), "{:?}, {:?}", a, b); -+ assert!(!(a < b), "{:?}, {:?}", a, b); -+ assert!(!(a > b), "{:?}, {:?}", a, b); -+ assert!(!(b < a), "{:?}, {:?}", a, b); -+ assert!(!(b > a), "{:?}, {:?}", a, b); -+ assert!(!(a <= b), "{:?}, {:?}", a, b); -+ assert!(!(b <= a), "{:?}, {:?}", a, b); -+ assert!(!(a >= b), "{:?}, {:?}", a, b); -+ assert!(!(b >= a), "{:?}, {:?}", a, b); -+ } -+ } -+} -+ -+// Returns a tuple containing two distinct pointer values of the same type as -+// the element type of the Simd vector `$id`. -+#[allow(unused)] -+macro_rules! ptr_vals { -+ ($id:ty) => { -+ // expands to an expression -+ #[allow(unused_unsafe)] -+ unsafe { -+ // all bits cleared -+ let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed(); -+ // all bits set -+ let set: <$id as sealed::Simd>::Element = -+ crate::mem::transmute(-1_isize); -+ (clear, set) -+ } -+ }; -+} -diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs -new file mode 100644 -index 000000000000..1d0282dc4278 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v128.rs -@@ -0,0 +1,80 @@ -+//! 128-bit wide vector types -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: | -+ /// A 128-bit vector with 16 `i8` lanes. -+); -+impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: | -+ /// A 128-bit vector with 16 `u8` lanes. -+); -+impl_m!([m8; 16]: m8x16 | i8 | test_v128 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: m16x16 | -+ /// A 128-bit vector mask with 16 `m8` lanes. -+); -+ -+impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: i8x8, u8x8 | -+ /// A 128-bit vector with 8 `i16` lanes. -+); -+impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: u8x8 | -+ /// A 128-bit vector with 8 `u16` lanes. -+); -+impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: m8x8, m32x8 | -+ /// A 128-bit vector mask with 8 `m16` lanes. -+); -+ -+impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 | -+ From: i8x4, u8x4, i16x4, u16x4 | -+ /// A 128-bit vector with 4 `i32` lanes. -+); -+impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 | -+ From: u8x4, u16x4 | -+ /// A 128-bit vector with 4 `u32` lanes. -+); -+impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 | -+ From: i8x4, u8x4, i16x4, u16x4 | -+ /// A 128-bit vector with 4 `f32` lanes. -+); -+impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 | -+ From: m8x4, m16x4, m64x4 | -+ /// A 128-bit vector mask with 4 `m32` lanes. -+); -+ -+impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 | -+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 | -+ /// A 128-bit vector with 2 `i64` lanes. -+); -+impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 | -+ From: u8x2, u16x2, u32x2 | -+ /// A 128-bit vector with 2 `u64` lanes. -+); -+impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 | -+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 | -+ /// A 128-bit vector with 2 `f64` lanes. -+); -+impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 | -+ From: m8x2, m16x2, m32x2, m128x2 | -+ /// A 128-bit vector mask with 2 `m64` lanes. -+); -+ -+impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 | -+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types -+ /// A 128-bit vector with 1 `i128` lane. -+); -+impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 | -+ From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types -+ /// A 128-bit vector with 1 `u128` lane. -+); -+impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 | -+ From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types -+ /// A 128-bit vector mask with 1 `m128` lane. -+); -diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs -new file mode 100644 -index 000000000000..67a3832d2530 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v16.rs -@@ -0,0 +1,16 @@ -+//! 16-bit wide vector types -+ -+use crate::*; -+ -+impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 | -+ From: | -+ /// A 16-bit vector with 2 `i8` lanes. -+); -+impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 | -+ From: | -+ /// A 16-bit vector with 2 `u8` lanes. -+); -+impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 | -+ From: m16x2, m32x2, m64x2, m128x2 | -+ /// A 16-bit vector mask with 2 `m8` lanes. -+); -diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs -new file mode 100644 -index 000000000000..6b59336f68b6 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v256.rs -@@ -0,0 +1,86 @@ -+//! 256-bit wide vector types -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: | -+ /// A 256-bit vector with 32 `i8` lanes. -+); -+impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: | -+ /// A 256-bit vector with 32 `u8` lanes. -+); -+impl_m!([m8; 32]: m8x32 | i8 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: | -+ /// A 256-bit vector mask with 32 `m8` lanes. -+); -+ -+impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: i8x16, u8x16 | -+ /// A 256-bit vector with 16 `i16` lanes. -+); -+impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: u8x16 | -+ /// A 256-bit vector with 16 `u16` lanes. -+); -+impl_m!([m16; 16]: m16x16 | i16 | test_v256 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: m8x16 | -+ /// A 256-bit vector mask with 16 `m16` lanes. -+); -+ -+impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: i8x8, u8x8, i16x8, u16x8 | -+ /// A 256-bit vector with 8 `i32` lanes. -+); -+impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: u8x8, u16x8 | -+ /// A 256-bit vector with 8 `u32` lanes. -+); -+impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: i8x8, u8x8, i16x8, u16x8 | -+ /// A 256-bit vector with 8 `f32` lanes. -+); -+impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: m8x8, m16x8 | -+ /// A 256-bit vector mask with 8 `m32` lanes. -+); -+ -+impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 | -+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 | -+ /// A 256-bit vector with 4 `i64` lanes. -+); -+impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 | -+ From: u8x4, u16x4, u32x4 | -+ /// A 256-bit vector with 4 `u64` lanes. -+); -+impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 | -+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 | -+ /// A 256-bit vector with 4 `f64` lanes. -+); -+impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 | -+ From: m8x4, m16x4, m32x4 | -+ /// A 256-bit vector mask with 4 `m64` lanes. -+); -+ -+impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 | -+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 | -+ /// A 256-bit vector with 2 `i128` lanes. -+); -+impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 | -+ From: u8x2, u16x2, u32x2, u64x2 | -+ /// A 256-bit vector with 2 `u128` lanes. -+); -+impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 | -+ From: m8x2, m16x2, m32x2, m64x2 | -+ /// A 256-bit vector mask with 2 `m128` lanes. -+); -diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs -new file mode 100644 -index 000000000000..09cef9bdd472 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v32.rs -@@ -0,0 +1,29 @@ -+//! 32-bit wide vector types -+ -+use crate::*; -+ -+impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 | -+ From: | -+ /// A 32-bit vector with 4 `i8` lanes. -+); -+impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 | -+ From: | -+ /// A 32-bit vector with 4 `u8` lanes. -+); -+impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 | -+ From: m16x4, m32x4, m64x4 | -+ /// A 32-bit vector mask with 4 `m8` lanes. -+); -+ -+impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 | -+ From: i8x2, u8x2 | -+ /// A 32-bit vector with 2 `i16` lanes. -+); -+impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 | -+ From: u8x2 | -+ /// A 32-bit vector with 2 `u16` lanes. -+); -+impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 | -+ From: m8x2, m32x2, m64x2, m128x2 | -+ /// A 32-bit vector mask with 2 `m16` lanes. -+); -diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs -new file mode 100644 -index 000000000000..b1714aded369 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v512.rs -@@ -0,0 +1,99 @@ -+//! 512-bit wide vector types -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, -+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, -+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | -+ From: | -+ /// A 512-bit vector with 64 `i8` lanes. -+); -+impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, -+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, -+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | -+ From: | -+ /// A 512-bit vector with 64 `u8` lanes. -+); -+impl_m!([m8; 64]: m8x64 | i8 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, -+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, -+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | -+ From: | -+ /// A 512-bit vector mask with 64 `m8` lanes. -+); -+ -+impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: i8x32, u8x32 | -+ /// A 512-bit vector with 32 `i16` lanes. -+); -+impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: u8x32 | -+ /// A 512-bit vector with 32 `u16` lanes. -+); -+impl_m!([m16; 32]: m16x32 | i16 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, -+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | -+ From: m8x32 | -+ /// A 512-bit vector mask with 32 `m16` lanes. -+); -+ -+impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: i8x16, u8x16, i16x16, u16x16 | -+ /// A 512-bit vector with 16 `i32` lanes. -+); -+impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: u8x16, u16x16 | -+ /// A 512-bit vector with 16 `u32` lanes. -+); -+impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: i8x16, u8x16, i16x16, u16x16 | -+ /// A 512-bit vector with 16 `f32` lanes. -+); -+impl_m!([m32; 16]: m32x16 | i32 | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -+ From: m8x16, m16x16 | -+ /// A 512-bit vector mask with 16 `m32` lanes. -+); -+ -+impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 | -+ /// A 512-bit vector with 8 `i64` lanes. -+); -+impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: u8x8, u16x8, u32x8 | -+ /// A 512-bit vector with 8 `u64` lanes. -+); -+impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 | -+ /// A 512-bit vector with 8 `f64` lanes. -+); -+impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: m8x8, m16x8, m32x8 | -+ /// A 512-bit vector mask with 8 `m64` lanes. -+); -+ -+impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 | -+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 | -+ /// A 512-bit vector with 4 `i128` lanes. -+); -+impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 | -+ From: u8x4, u16x4, u32x4, u64x4 | -+ /// A 512-bit vector with 4 `u128` lanes. -+); -+impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 | -+ From: m8x4, m16x4, m32x4, m64x4 | -+ /// A 512-bit vector mask with 4 `m128` lanes. -+); -diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs -new file mode 100644 -index 000000000000..1ee6219c040b ---- /dev/null -+++ b/third_party/rust/packed_simd/src/v64.rs -@@ -0,0 +1,66 @@ -+//! 64-bit wide vector types -+#![rustfmt::skip] -+ -+use super::*; -+ -+impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: | -+ /// A 64-bit vector with 8 `i8` lanes. -+); -+impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: | -+ /// A 64-bit vector with 8 `u8` lanes. -+); -+impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: m16x8, m32x8 | -+ /// A 64-bit vector mask with 8 `m8` lanes. -+); -+ -+impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 | -+ From: i8x4, u8x4 | -+ /// A 64-bit vector with 4 `i16` lanes. -+); -+impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 | -+ From: u8x4 | -+ /// A 64-bit vector with 4 `u16` lanes. -+); -+impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 | -+ From: m8x4, m32x4, m64x4 | -+ /// A 64-bit vector mask with 4 `m16` lanes. -+); -+ -+impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 | -+ From: i8x2, u8x2, i16x2, u16x2 | -+ /// A 64-bit vector with 2 `i32` lanes. -+); -+impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 | -+ From: u8x2, u16x2 | -+ /// A 64-bit vector with 2 `u32` lanes. -+); -+impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 | -+ From: m8x2, m16x2, m64x2, m128x2 | -+ /// A 64-bit vector mask with 2 `m32` lanes. -+); -+impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 | -+ From: i8x2, u8x2, i16x2, u16x2 | -+ /// A 64-bit vector with 2 `f32` lanes. -+); -+ -+/* -+impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 | -+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion -+ /// A 64-bit vector with 1 `i64` lanes. -+); -+impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 | -+ From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion -+ /// A 64-bit vector with 1 `u64` lanes. -+); -+impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 | -+ From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types -+ /// A 64-bit vector mask with 1 `m64` lanes. -+); -+impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 | -+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types -+ /// A 64-bit vector with 1 `f64` lanes. -+); -+*/ -diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs -new file mode 100644 -index 000000000000..fe9fb28ffa89 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/vPtr.rs -@@ -0,0 +1,34 @@ -+//! Vectors of pointers -+#![rustfmt::skip] -+ -+use crate::*; -+ -+impl_const_p!( -+ [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | -+ /// A vector with 2 `*const T` lanes -+); -+ -+impl_mut_p!( -+ [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | -+ /// A vector with 2 `*mut T` lanes -+); -+ -+impl_const_p!( -+ [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | -+ /// A vector with 4 `*const T` lanes -+); -+ -+impl_mut_p!( -+ [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | -+ /// A vector with 4 `*mut T` lanes -+); -+ -+impl_const_p!( -+ [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | -+ /// A vector with 8 `*const T` lanes -+); -+ -+impl_mut_p!( -+ [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | -+ /// A vector with 8 `*mut T` lanes -+); -diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs -new file mode 100644 -index 000000000000..5594323372b4 ---- /dev/null -+++ b/third_party/rust/packed_simd/src/vSize.rs -@@ -0,0 +1,53 @@ -+//! Vectors with pointer-sized elements -+ -+use crate::codegen::pointer_sized_int::{isize_, usize_}; -+use crate::*; -+ -+impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 | -+ x0, x1| -+ From: | -+ /// A vector with 2 `isize` lanes. -+); -+ -+impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 | -+ x0, x1| -+ From: | -+ /// A vector with 2 `usize` lanes. -+); -+impl_m!([msize; 2]: msizex2 | isize_ | test_v128 | -+ x0, x1 | -+ From: | -+ /// A vector mask with 2 `msize` lanes. -+); -+ -+impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 | -+ x0, x1, x2, x3 | -+ From: | -+ /// A vector with 4 `isize` lanes. -+); -+impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 | -+ x0, x1, x2, x3| -+ From: | -+ /// A vector with 4 `usize` lanes. -+); -+impl_m!([msize; 4]: msizex4 | isize_ | test_v256 | -+ x0, x1, x2, x3 | -+ From: | -+ /// A vector mask with 4 `msize` lanes. -+); -+ -+impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: | -+ /// A vector with 4 `isize` lanes. -+); -+impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: | -+ /// A vector with 8 `usize` lanes. -+); -+impl_m!([msize; 8]: msizex8 | isize_ | test_v512 | -+ x0, x1, x2, x3, x4, x5, x6, x7 | -+ From: | -+ /// A vector mask with 8 `msize` lanes. -+); -diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs -new file mode 100644 -index 000000000000..1e6b4f354301 ---- /dev/null -+++ b/third_party/rust/packed_simd/tests/endianness.rs -@@ -0,0 +1,262 @@ -+#[cfg(target_arch = "wasm32")] -+use wasm_bindgen_test::*; -+ -+use packed_simd::*; -+use std::{mem, slice}; -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_indexing() { -+ let v = i32x4::new(0, 1, 2, 3); -+ assert_eq!(v.extract(0), 0); -+ assert_eq!(v.extract(1), 1); -+ assert_eq!(v.extract(2), 2); -+ assert_eq!(v.extract(3), 3); -+} -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_bitcasts() { -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let t: i16x8 = unsafe { mem::transmute(x) }; -+ let e: i16x8 = if cfg!(target_endian = "little") { -+ i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) -+ } else { -+ i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) -+ }; -+ assert_eq!(t, e); -+} -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_casts() { -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let t: i16x16 = x.into(); // simd_cast -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let e = i16x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ assert_eq!(t, e); -+} -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_load_and_stores() { -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let mut y: [i16; 8] = [0; 8]; -+ x.write_to_slice_unaligned(unsafe { -+ slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) -+ }); -+ -+ let e: [i16; 8] = if cfg!(target_endian = "little") { -+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] -+ } else { -+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] -+ }; -+ assert_eq!(y, e); -+ -+ let z = i8x16::from_slice_unaligned(unsafe { -+ slice::from_raw_parts(&y as *const _ as *const i8, 16) -+ }); -+ assert_eq!(z, x); -+} -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_array_union() { -+ union A { -+ data: [f32; 4], -+ vec: f32x4, -+ } -+ let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; -+ assert_eq!(x[0], 0_f32); -+ assert_eq!(x[1], 1_f32); -+ assert_eq!(x[2], 2_f32); -+ assert_eq!(x[3], 3_f32); -+ let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec }; -+ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); -+ -+ union B { -+ data: [i8; 16], -+ vec: i8x16, -+ } -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let x: [i8; 16] = unsafe { B { vec: x }.data }; -+ -+ for i in 0..16 { -+ assert_eq!(x[i], i as i8); -+ } -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let y = [ -+ 15, 14, 13, 12, 11, 19, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ]; -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let e = i8x16::new( -+ 15, 14, 13, 12, 11, 19, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ); -+ let z = unsafe { B { data: y }.vec }; -+ assert_eq!(z, e); -+ -+ union C { -+ data: [i16; 8], -+ vec: i8x16, -+ } -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let x: [i16; 8] = unsafe { C { vec: x }.data }; -+ -+ let e: [i16; 8] = if cfg!(target_endian = "little") { -+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] -+ } else { -+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] -+ }; -+ assert_eq!(x, e); -+} -+ -+#[cfg_attr(not(target_arch = "wasm32"), test)] -+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -+fn endian_tuple_access() { -+ type F32x4T = (f32, f32, f32, f32); -+ union A { -+ data: F32x4T, -+ vec: f32x4, -+ } -+ let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; -+ assert_eq!(x.0, 0_f32); -+ assert_eq!(x.1, 1_f32); -+ assert_eq!(x.2, 2_f32); -+ assert_eq!(x.3, 3_f32); -+ let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec }; -+ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); -+ union B { -+ data: I8x16T, -+ vec: i8x16, -+ } -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let x: I8x16T = unsafe { B { vec: x }.data }; -+ -+ assert_eq!(x.0, 0); -+ assert_eq!(x.1, 1); -+ assert_eq!(x.2, 2); -+ assert_eq!(x.3, 3); -+ assert_eq!(x.4, 4); -+ assert_eq!(x.5, 5); -+ assert_eq!(x.6, 6); -+ assert_eq!(x.7, 7); -+ assert_eq!(x.8, 8); -+ assert_eq!(x.9, 9); -+ assert_eq!(x.10, 10); -+ assert_eq!(x.11, 11); -+ assert_eq!(x.12, 12); -+ assert_eq!(x.13, 13); -+ assert_eq!(x.14, 14); -+ assert_eq!(x.15, 15); -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let y = ( -+ 15, 14, 13, 12, 11, 10, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ); -+ let z: i8x16 = unsafe { B { data: y }.vec }; -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let e = i8x16::new( -+ 15, 14, 13, 12, 11, 10, 9, 8, -+ 7, 6, 5, 4, 3, 2, 1, 0 -+ ); -+ assert_eq!(e, z); -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); -+ union C { -+ data: I16x8T, -+ vec: i8x16, -+ } -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let x: I16x8T = unsafe { C { vec: x }.data }; -+ -+ let e: [i16; 8] = if cfg!(target_endian = "little") { -+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] -+ } else { -+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] -+ }; -+ assert_eq!(x.0, e[0]); -+ assert_eq!(x.1, e[1]); -+ assert_eq!(x.2, e[2]); -+ assert_eq!(x.3, e[3]); -+ assert_eq!(x.4, e[4]); -+ assert_eq!(x.5, e[5]); -+ assert_eq!(x.6, e[6]); -+ assert_eq!(x.7, e[7]); -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ #[repr(C)] -+ #[derive(Copy ,Clone)] -+ pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, -+ pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); -+ -+ union D { -+ data: Tup, -+ vec: i8x16, -+ } -+ -+ #[cfg_attr(rustfmt, rustfmt_skip)] -+ let x = i8x16::new( -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ ); -+ let x: Tup = unsafe { D { vec: x }.data }; -+ -+ let e: [i16; 12] = if cfg!(target_endian = "little") { -+ [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] -+ } else { -+ [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] -+ }; -+ assert_eq!(x.0 as i16, e[0]); -+ assert_eq!(x.1 as i16, e[1]); -+ assert_eq!(x.2 as i16, e[2]); -+ assert_eq!(x.3 as i16, e[3]); -+ assert_eq!(x.4 as i16, e[4]); -+ assert_eq!(x.5 as i16, e[5]); -+ assert_eq!(x.6 as i16, e[6]); -+ assert_eq!(x.7 as i16, e[7]); -+ assert_eq!(x.8 as i16, e[8]); -+ assert_eq!(x.9 as i16, e[9]); -+ assert_eq!(x.10 as i16, e[10]); -+ assert_eq!(x.11 as i16, e[11]); -+} -diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json -deleted file mode 100644 -index 5e8c154cda72..000000000000 ---- a/third_party/rust/simd/.cargo-checksum.json -+++ /dev/null -@@ -1 +0,0 @@ --{"files":{"Cargo.toml":"0c7a480c62d7b42604098fa1dd6453be79629112569c494efa75d7fd0998fd69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"f426ca32bb44fee39d83d51e481fe6b2640d4b78cb097c952cd75800b886f2fd","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","build.rs":"69c9c9029ca969a4bb3f11429bc1424fa75af46143eb0d853b4db3a512859b32","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"71d0bb6dac5f58599bb825449701a05cf32f6eca1918e80d060b746e69751c37","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"a24a207e65468de2189297380747e2f2f33ec2317f4b83f0665d34b1c09feb08","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"} -\ No newline at end of file -diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml -deleted file mode 100644 -index 30279b93556c..000000000000 ---- a/third_party/rust/simd/Cargo.toml -+++ /dev/null -@@ -1,37 +0,0 @@ --# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO --# --# When uploading crates to the registry Cargo will automatically --# "normalize" Cargo.toml files for maximal compatibility --# with all versions of Cargo and also rewrite `path` dependencies --# to registry (e.g. crates.io) dependencies --# --# If you believe there's an error in this file please file an --# issue against the rust-lang/cargo repository. If you're --# editing this file be aware that the upstream Cargo.toml --# will likely look very different (and much more reasonable) -- --[package] --name = "simd" --version = "0.2.3" --authors = ["Huon Wilson "] --description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n(To be obsoleted by the `std::simd` implementation RFC 2366.)\n" --documentation = "https://docs.rs/simd/" --readme = "README.md" --keywords = ["simd", "data-parallel"] --license = "MIT/Apache-2.0" --repository = "https://github.com/hsivonen/simd" --[package.metadata.docs.rs] --features = ["doc"] --[dependencies.serde] --version = "1.0" --optional = true -- --[dependencies.serde_derive] --version = "1.0" --optional = true --[dev-dependencies.cfg-if] --version = "0.1" -- --[features] --doc = [] --with-serde = ["serde", "serde_derive"] -diff --git a/third_party/rust/simd/README.md b/third_party/rust/simd/README.md -deleted file mode 100644 -index 1c34f49bcd91..000000000000 ---- a/third_party/rust/simd/README.md -+++ /dev/null -@@ -1,11 +0,0 @@ --# `simd` -- --[![Build Status](https://travis-ci.org/hsivonen/simd.svg?branch=master)](https://travis-ci.org/hsivonen/simd) --[![crates.io](https://meritbadge.herokuapp.com/simd)](https://crates.io/crates/simd) --[![docs.rs](https://docs.rs/simd/badge.svg)](https://docs.rs/simd/) -- --`simd` offers a basic interface to the SIMD functionality of CPUs. (Note: This crate fails to build unless the target is aarch64, x86_64, i686 (i.e. SSE2 enabled; not i586) or an ARMv7 target (thumb or not) with NEON enabled.) -- --This crate is expected to become _obsolete_ once the implementation of [RFC 2366](https://github.com/rust-lang/rfcs/pull/2366) lands in the standard library. -- --[Documentation](https://docs.rs/simd) -diff --git a/third_party/rust/simd/benches/mandelbrot.rs b/third_party/rust/simd/benches/mandelbrot.rs -deleted file mode 100755 -index 61061a4a301f..000000000000 ---- a/third_party/rust/simd/benches/mandelbrot.rs -+++ /dev/null -@@ -1,117 +0,0 @@ --#![feature(test)] --#![feature(cfg_target_feature)] -- --extern crate simd; --extern crate test; -- --use test::black_box as bb; --use test::Bencher as B; --use simd::{f32x4, u32x4}; --#[cfg(any(target_feature = "avx", target_feature = "avx2"))] --use simd::x86::avx::{f32x8, u32x8}; -- --fn naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { -- let mut x = c_x; -- let mut y = c_y; -- let mut count = 0; -- while count < max_iter { -- let xy = x * y; -- let xx = x * x; -- let yy = y * y; -- let sum = xx + yy; -- if sum > 4.0 { -- break -- } -- count += 1; -- x = xx - yy + c_x; -- y = xy * 2.0 + c_y; -- } -- count --} -- --fn simd4(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { -- let mut x = c_x; -- let mut y = c_y; -- -- let mut count = u32x4::splat(0); -- for _ in 0..max_iter as usize { -- let xy = x * y; -- let xx = x * x; -- let yy = y * y; -- let sum = xx + yy; -- let mask = sum.lt(f32x4::splat(4.0)); -- -- if !mask.any() { break } -- count = count + mask.to_i().select(u32x4::splat(1), u32x4::splat(0)); -- -- x = xx - yy + c_x; -- y = xy + xy + c_y; -- } -- count --} -- --#[cfg(target_feature = "avx")] --fn simd8(c_x: f32x8, c_y: f32x8, max_iter: u32) -> u32x8 { -- let mut x = c_x; -- let mut y = c_y; -- -- let mut count = u32x8::splat(0); -- for _ in 0..max_iter as usize { -- let xy = x * y; -- let xx = x * x; -- let yy = y * y; -- let sum = xx + yy; -- let mask = sum.lt(f32x8::splat(4.0)); -- -- if !mask.any() { break } -- count = count + mask.to_i().select(u32x8::splat(1), u32x8::splat(0)); -- -- x = xx - yy + c_x; -- y = xy + xy + c_y; -- } -- count --} -- --const SCALE: f32 = 3.0 / 100.0; --const N: u32 = 100; --#[bench] --fn mandel_naive(b: &mut B) { -- b.iter(|| { -- for j in 0..100 { -- let y = -1.5 + (j as f32) * SCALE; -- for i in 0..100 { -- let x = -2.2 + (i as f32) * SCALE; -- bb(naive(x, y, N)); -- } -- } -- }) --} --#[bench] --fn mandel_simd4(b: &mut B) { -- let tweak = u32x4::new(0, 1, 2, 3); -- b.iter(|| { -- for j in 0..100 { -- let y = f32x4::splat(-1.5) + f32x4::splat(SCALE) * u32x4::splat(j).to_f32(); -- for i in 0..25 { -- let i = u32x4::splat(i * 4) + tweak; -- let x = f32x4::splat(-2.2) + f32x4::splat(SCALE) * i.to_f32(); -- bb(simd4(x, y, N)); -- } -- } -- }) --} --#[cfg(any(target_feature = "avx", target_feature = "avx2"))] --#[bench] --fn mandel_simd8(b: &mut B) { -- let tweak = u32x8::new(0, 1, 2, 3, 4, 5, 6, 7); -- b.iter(|| { -- for j in 0..100 { -- let y = f32x8::splat(-1.5) + f32x8::splat(SCALE) * u32x8::splat(j).to_f32(); -- for i in 0..13 { // 100 not divisible by 8 :( -- let i = u32x8::splat(i * 8) + tweak; -- let x = f32x8::splat(-2.2) + f32x8::splat(SCALE) * i.to_f32(); -- bb(simd8(x, y, N)); -- } -- } -- }) --} -diff --git a/third_party/rust/simd/benches/matrix.rs b/third_party/rust/simd/benches/matrix.rs -deleted file mode 100755 -index 36aa88237492..000000000000 ---- a/third_party/rust/simd/benches/matrix.rs -+++ /dev/null -@@ -1,485 +0,0 @@ --#![feature(test)] --#![feature(cfg_target_feature)] --extern crate test; --extern crate simd; -- --use test::black_box as bb; --use test::Bencher as B; --use simd::f32x4; --#[cfg(target_feature = "avx")] --use simd::x86::avx::{f32x8, f64x4}; --// #[cfg(target_feature = "avx2")] --// use simd::x86::avx2::Avx2F32x8; -- -- --#[bench] --fn multiply_naive(b: &mut B) { -- let x = [[1.0_f32; 4]; 4]; -- let y = [[2.0; 4]; 4]; -- b.iter(|| { -- for _ in 0..100 { -- let (x, y) = bb((&x, &y)); -- -- bb(&[[x[0][0] * y[0][0] + x[1][0] * y[0][1] + x[2][0] * y[0][2] + x[3][0] * y[0][3], -- x[0][1] * y[0][0] + x[1][1] * y[0][1] + x[2][1] * y[0][2] + x[3][1] * y[0][3], -- x[0][2] * y[0][0] + x[1][2] * y[0][1] + x[2][2] * y[0][2] + x[3][2] * y[0][3], -- x[0][3] * y[0][0] + x[1][3] * y[0][1] + x[2][3] * y[0][2] + x[3][3] * y[0][3]], -- [x[0][0] * y[1][0] + x[1][0] * y[1][1] + x[2][0] * y[1][2] + x[3][0] * y[1][3], -- x[0][1] * y[1][0] + x[1][1] * y[1][1] + x[2][1] * y[1][2] + x[3][1] * y[1][3], -- x[0][2] * y[1][0] + x[1][2] * y[1][1] + x[2][2] * y[1][2] + x[3][2] * y[1][3], -- x[0][3] * y[1][0] + x[1][3] * y[1][1] + x[2][3] * y[1][2] + x[3][3] * y[1][3]], -- [x[0][0] * y[2][0] + x[1][0] * y[2][1] + x[2][0] * y[2][2] + x[3][0] * y[2][3], -- x[0][1] * y[2][0] + x[1][1] * y[2][1] + x[2][1] * y[2][2] + x[3][1] * y[2][3], -- x[0][2] * y[2][0] + x[1][2] * y[2][1] + x[2][2] * y[2][2] + x[3][2] * y[2][3], -- x[0][3] * y[2][0] + x[1][3] * y[2][1] + x[2][3] * y[2][2] + x[3][3] * y[2][3]], -- [x[0][0] * y[3][0] + x[1][0] * y[3][1] + x[2][0] * y[3][2] + x[3][0] * y[3][3], -- x[0][1] * y[3][0] + x[1][1] * y[3][1] + x[2][1] * y[3][2] + x[3][1] * y[3][3], -- x[0][2] * y[3][0] + x[1][2] * y[3][1] + x[2][2] * y[3][2] + x[3][2] * y[3][3], -- x[0][3] * y[3][0] + x[1][3] * y[3][1] + x[2][3] * y[3][2] + x[3][3] * y[3][3]], -- ]); -- } -- }) --} -- --#[bench] --fn multiply_simd4_32(b: &mut B) { -- let x = [f32x4::splat(1.0_f32); 4]; -- let y = [f32x4::splat(2.0); 4]; -- b.iter(|| { -- for _ in 0..100 { -- let (x, y) = bb((&x, &y)); -- -- let y0 = y[0]; -- let y1 = y[1]; -- let y2 = y[2]; -- let y3 = y[3]; -- bb(&[f32x4::splat(y0.extract(0)) * x[0] + -- f32x4::splat(y0.extract(1)) * x[1] + -- f32x4::splat(y0.extract(2)) * x[2] + -- f32x4::splat(y0.extract(3)) * x[3], -- f32x4::splat(y1.extract(0)) * x[0] + -- f32x4::splat(y1.extract(1)) * x[1] + -- f32x4::splat(y1.extract(2)) * x[2] + -- f32x4::splat(y1.extract(3)) * x[3], -- f32x4::splat(y2.extract(0)) * x[0] + -- f32x4::splat(y2.extract(1)) * x[1] + -- f32x4::splat(y2.extract(2)) * x[2] + -- f32x4::splat(y2.extract(3)) * x[3], -- f32x4::splat(y3.extract(0)) * x[0] + -- f32x4::splat(y3.extract(1)) * x[1] + -- f32x4::splat(y3.extract(2)) * x[2] + -- f32x4::splat(y3.extract(3)) * x[3], -- ]); -- } -- }) --} -- --#[cfg(target_feature = "avx")] --#[bench] --fn multiply_simd4_64(b: &mut B) { -- let x = [f64x4::splat(1.0_f64); 4]; -- let y = [f64x4::splat(2.0); 4]; -- b.iter(|| { -- for _ in 0..100 { -- let (x, y) = bb((&x, &y)); -- -- let y0 = y[0]; -- let y1 = y[1]; -- let y2 = y[2]; -- let y3 = y[3]; -- bb(&[f64x4::splat(y0.extract(0)) * x[0] + -- f64x4::splat(y0.extract(1)) * x[1] + -- f64x4::splat(y0.extract(2)) * x[2] + -- f64x4::splat(y0.extract(3)) * x[3], -- f64x4::splat(y1.extract(0)) * x[0] + -- f64x4::splat(y1.extract(1)) * x[1] + -- f64x4::splat(y1.extract(2)) * x[2] + -- f64x4::splat(y1.extract(3)) * x[3], -- f64x4::splat(y2.extract(0)) * x[0] + -- f64x4::splat(y2.extract(1)) * x[1] + -- f64x4::splat(y2.extract(2)) * x[2] + -- f64x4::splat(y2.extract(3)) * x[3], -- f64x4::splat(y3.extract(0)) * x[0] + -- f64x4::splat(y3.extract(1)) * x[1] + -- f64x4::splat(y3.extract(2)) * x[2] + -- f64x4::splat(y3.extract(3)) * x[3], -- ]); -- } -- }) --} -- --#[bench] --fn inverse_naive(b: &mut B) { -- let mut x = [[0_f32; 4]; 4]; -- for i in 0..4 { x[i][i] = 1.0 } -- -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- -- let mut t = [[0_f32; 4]; 4]; -- for i in 0..4 { -- t[0][i] = x[i][0]; -- t[1][i] = x[i][1]; -- t[2][i] = x[i][2]; -- t[3][i] = x[i][3]; -- } -- -- let _0 = t[2][2] * t[3][3]; -- let _1 = t[2][3] * t[3][2]; -- let _2 = t[2][1] * t[3][3]; -- let _3 = t[2][3] * t[3][1]; -- let _4 = t[2][1] * t[3][2]; -- let _5 = t[2][2] * t[3][1]; -- let _6 = t[2][0] * t[3][3]; -- let _7 = t[2][3] * t[3][0]; -- let _8 = t[2][0] * t[3][2]; -- let _9 = t[2][2] * t[3][0]; -- let _10 = t[2][0] * t[3][1]; -- let _11 = t[2][1] * t[3][0]; -- -- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - -- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); -- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - -- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); -- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - -- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); -- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - -- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); -- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - -- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); -- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - -- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); -- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - -- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); -- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - -- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); -- -- let _0 = t[0][2] * t[1][3]; -- let _1 = t[0][3] * t[1][2]; -- let _2 = t[0][1] * t[1][3]; -- let _3 = t[0][3] * t[1][1]; -- let _4 = t[0][1] * t[1][2]; -- let _5 = t[0][2] * t[1][1]; -- let _6 = t[0][0] * t[1][3]; -- let _7 = t[0][3] * t[1][0]; -- let _8 = t[0][0] * t[1][2]; -- let _9 = t[0][2] * t[1][0]; -- let _10 = t[0][0] * t[1][1]; -- let _11 = t[0][1] * t[1][0]; -- -- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- -- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); -- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- -- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); -- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- -- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); -- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- -- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); -- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- -- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); -- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- -- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); -- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- -- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); -- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- -- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); -- -- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; -- -- let det = 1.0 / det; -- let mut ret = [[d00, d01, d02, d03], -- [d10, d11, d12, d13], -- [d20, d21, d22, d23], -- [d30, d31, d32, d33]]; -- for i in 0..4 { -- for j in 0..4 { -- ret[i][j] *= det; -- } -- } -- bb(&ret); -- } -- }) --} -- --#[bench] --fn inverse_simd4(b: &mut B) { -- let mut x = [f32x4::splat(0_f32); 4]; -- for i in 0..4 { x[i] = x[i].replace(i as u32, 1.0); } -- -- fn shuf0145(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(0), v.extract(1), -- w.extract(4 - 4), w.extract(5 - 4)) -- } -- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(0), v.extract(2), -- w.extract(4 - 4), w.extract(6 - 4)) -- } -- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(1), v.extract(3), -- w.extract(5 - 4), w.extract(7 - 4)) -- } -- fn shuf2367(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(2), v.extract(3), -- w.extract(6 - 4), w.extract(7 - 4)) -- } -- -- fn swiz1032(v: f32x4) -> f32x4 { -- f32x4::new(v.extract(1), v.extract(0), -- v.extract(3), v.extract(2)) -- } -- fn swiz2301(v: f32x4) -> f32x4 { -- f32x4::new(v.extract(2), v.extract(3), -- v.extract(0), v.extract(1)) -- } -- -- b.iter(|| { -- for _ in 0..100 { -- let src0; -- let src1; -- let src2; -- let src3; -- let mut tmp1; -- let row0; -- let mut row1; -- let mut row2; -- let mut row3; -- let mut minor0; -- let mut minor1; -- let mut minor2; -- let mut minor3; -- let mut det; -- -- let x = bb(&x); -- src0 = x[0]; -- src1 = x[1]; -- src2 = x[2]; -- src3 = x[3]; -- -- tmp1 = shuf0145(src0, src1); -- row1 = shuf0145(src2, src3); -- row0 = shuf0246(tmp1, row1); -- row1 = shuf1357(row1, tmp1); -- -- tmp1 = shuf2367(src0, src1); -- row3 = shuf2367(src2, src3); -- row2 = shuf0246(tmp1, row3); -- row3 = shuf0246(row3, tmp1); -- -- -- tmp1 = row2 * row3; -- tmp1 = swiz1032(tmp1); -- minor0 = row1 * tmp1; -- minor1 = row0 * tmp1; -- tmp1 = swiz2301(tmp1); -- minor0 = (row1 * tmp1) - minor0; -- minor1 = (row0 * tmp1) - minor1; -- minor1 = swiz2301(minor1); -- -- -- tmp1 = row1 * row2; -- tmp1 = swiz1032(tmp1); -- minor0 = (row3 * tmp1) + minor0; -- minor3 = row0 * tmp1; -- tmp1 = swiz2301(tmp1); -- -- minor0 = minor0 - row3 * tmp1; -- minor3 = row0 * tmp1 - minor3; -- minor3 = swiz2301(minor3); -- -- -- tmp1 = row3 * swiz2301(row1); -- tmp1 = swiz1032(tmp1); -- row2 = swiz2301(row2); -- minor0 = row2 * tmp1 + minor0; -- minor2 = row0 * tmp1; -- tmp1 = swiz2301(tmp1); -- minor0 = minor0 - row2 * tmp1; -- minor2 = row0 * tmp1 - minor2; -- minor2 = swiz2301(minor2); -- -- -- tmp1 = row0 * row1; -- tmp1 = swiz1032(tmp1); -- minor2 = minor2 + row3 * tmp1; -- minor3 = row2 * tmp1 - minor3; -- tmp1 = swiz2301(tmp1); -- minor2 = row3 * tmp1 - minor2; -- minor3 = minor3 - row2 * tmp1; -- -- -- -- tmp1 = row0 * row3; -- tmp1 = swiz1032(tmp1); -- minor1 = minor1 - row2 * tmp1; -- minor2 = row1 * tmp1 + minor2; -- tmp1 = swiz2301(tmp1); -- minor1 = row2 * tmp1 + minor1; -- minor2 = minor2 - row1 * tmp1; -- -- tmp1 = row0 * row2; -- tmp1 = swiz1032(tmp1); -- minor1 = row3 * tmp1 + minor1; -- minor3 = minor3 - row1 * tmp1; -- tmp1 = swiz2301(tmp1); -- minor1 = minor1 - row3 * tmp1; -- minor3 = row1 * tmp1 + minor3; -- -- det = row0 * minor0; -- det = swiz2301(det) + det; -- det = swiz1032(det) + det; -- //tmp1 = det.approx_reciprocal(); det = tmp1 * (f32x4::splat(2.0) - det * tmp1); -- det = f32x4::splat(1.0) / det; -- -- bb(&[minor0 * det, minor1 * det, minor2 * det, minor3 * det]); -- } -- }) -- --} -- --#[bench] --fn transpose_naive(b: &mut B) { -- let x = [[0_f32; 4]; 4]; -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- bb(&[[x[0][0], x[1][0], x[2][0], x[3][0]], -- [x[0][1], x[1][1], x[2][1], x[3][1]], -- [x[0][2], x[1][2], x[2][2], x[3][2]], -- [x[0][3], x[1][3], x[2][3], x[3][3]]]); -- } -- }) --} -- --#[bench] --fn transpose_simd4(b: &mut B) { -- let x = [f32x4::splat(0_f32); 4]; -- -- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(0), v.extract(2), -- w.extract(4 - 4), w.extract(6 - 4)) -- } -- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { -- f32x4::new(v.extract(1), v.extract(3), -- w.extract(5 - 4), w.extract(7 - 4)) -- } -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- let x0 = x[0]; -- let x1 = x[1]; -- let x2 = x[2]; -- let x3 = x[3]; -- -- let a0 = shuf0246(x0, x1); -- let a1 = shuf0246(x2, x3); -- let a2 = shuf1357(x0, x1); -- let a3 = shuf1357(x2, x3); -- -- let b0 = shuf0246(a0, a1); -- let b1 = shuf0246(a2, a3); -- let b2 = shuf1357(a0, a1); -- let b3 = shuf1357(a2, a3); -- bb(&[b0, b1, b2, b3]); -- } -- }) --} -- --#[cfg(target_feature = "avx")] --#[bench] --fn transpose_simd8_naive(b: &mut B) { -- let x = [f32x8::splat(0_f32); 2]; -- -- fn shuf0246(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(0), v.extract(2), v.extract(4), v.extract(6), -- w.extract(0), w.extract(2), w.extract(4), w.extract(6)) -- } -- fn shuf1357(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(1), v.extract(3), v.extract(5), v.extract(7), -- w.extract(1), w.extract(3), w.extract(5), w.extract(7),) -- } -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- let x01 = x[0]; -- let x23 = x[1]; -- -- let a01 = shuf0246(x01, x23); -- let a23 = shuf1357(x01, x23); -- -- let b01 = shuf0246(a01, a23); -- let b23 = shuf1357(a01, a23); -- bb(&[b01, b23]); -- } -- }) --} -- --#[cfg(target_feature = "avx")] --#[bench] --fn transpose_simd8_avx2_vpermps(b: &mut B) { -- let x = [f32x8::splat(0_f32); 2]; -- -- // efficient on AVX2 using vpermps -- fn perm04152637(v: f32x8) -> f32x8 { -- // broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) -- // v.permutevar(i32x8::new(0, 4, 1, 5, 2, 6, 3, 7)) -- f32x8::new(v.extract(0), v.extract(4), v.extract(1), v.extract(5), -- v.extract(2), v.extract(6), v.extract(3), v.extract(7)) -- } -- fn shuf_lo(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(0), v.extract(1), w.extract(0), w.extract(1), -- v.extract(4), v.extract(5), w.extract(4), w.extract(5),) -- } -- fn shuf_hi(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(2), v.extract(3), w.extract(2), w.extract(3), -- v.extract(6), v.extract(7), w.extract(6), w.extract(7),) -- } -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- let x01 = x[0]; -- let x23 = x[1]; -- -- let a01 = perm04152637(x01); -- let a23 = perm04152637(x23); -- -- let b01 = shuf_lo(a01, a23); -- let b23 = shuf_hi(a01, a23); -- bb(&[b01, b23]); -- } -- }) --} -- --#[cfg(target_feature = "avx")] --#[bench] --fn transpose_simd8_avx2_vpermpd(b: &mut B) { -- let x = [f32x8::splat(0_f32); 2]; -- -- // efficient on AVX2 using vpermpd -- fn perm01452367(v: f32x8) -> f32x8 { -- f32x8::new(v.extract(0), v.extract(1), v.extract(4), v.extract(5), -- v.extract(2), v.extract(3), v.extract(6), v.extract(7)) -- } -- fn shuf_lo_ps(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(0), w.extract(0), v.extract(1), w.extract(1), -- v.extract(4), w.extract(4), v.extract(5), w.extract(5),) -- } -- fn shuf_hi_ps(v: f32x8, w: f32x8) -> f32x8 { -- f32x8::new(v.extract(2), w.extract(2), v.extract(3), w.extract(3), -- v.extract(6), w.extract(6), v.extract(7), w.extract(7),) -- } -- b.iter(|| { -- for _ in 0..100 { -- let x = bb(&x); -- let x01 = x[0]; -- let x23 = x[1]; -- -- let a01 = perm01452367(x01); -- let a23 = perm01452367(x23); -- -- let b01 = shuf_lo_ps(a01, a23); -- let b23 = shuf_hi_ps(a01, a23); -- bb(&[b01, b23]); -- } -- }) --} -diff --git a/third_party/rust/simd/build.rs b/third_party/rust/simd/build.rs -deleted file mode 100644 -index 61b5330a1846..000000000000 ---- a/third_party/rust/simd/build.rs -+++ /dev/null -@@ -1,3 +0,0 @@ --fn main() { -- println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); --} -diff --git a/third_party/rust/simd/examples/axpy.rs b/third_party/rust/simd/examples/axpy.rs -deleted file mode 100755 -index 7862721b254d..000000000000 ---- a/third_party/rust/simd/examples/axpy.rs -+++ /dev/null -@@ -1,65 +0,0 @@ --#![feature(cfg_target_feature)] --extern crate simd; --use simd::f32x4; --#[cfg(target_feature = "avx")] --use simd::x86::avx::f32x8; -- --#[inline(never)] --pub fn axpy(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { -- assert_eq!(x.len(), y.len()); -- assert_eq!(x.len(), z.len()); -- -- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); -- -- let mut i = 0; -- while i < len & !3 { -- let x = f32x4::load(x, i); -- let y = f32x4::load(y, i); -- (f32x4::splat(a) * x + y).store(z, i); -- i += 4 -- } --} -- --#[cfg(target_feature = "avx")] --#[inline(never)] --pub fn axpy8(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { -- assert_eq!(x.len(), y.len()); -- assert_eq!(x.len(), z.len()); -- -- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); -- -- let mut i = 0; -- while i < len & !7 { -- let x = f32x8::load(x, i); -- let y = f32x8::load(y, i); -- (f32x8::splat(a) * x + y).store(z, i); -- i += 8 -- } --} -- -- --#[cfg(not(target_feature = "avx"))] --pub fn axpy8(_: &mut [f32], _: f32, _: &[f32], _: &[f32]) { -- unimplemented!() --} -- -- --fn main() { -- let mut z = vec![0.; 4]; -- axpy(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); -- println!("{:?}", z); -- let mut z = vec![0.; 8]; -- axpy(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], -- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); -- println!("{:?}", z); -- -- if cfg!(target_feature = "avx") { -- let mut z = vec![0.; 4]; -- axpy8(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); -- println!("{:?}", z); -- let mut z = vec![0.; 8]; -- axpy8(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], -- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); -- println!("{:?}", z); -- } --} -diff --git a/third_party/rust/simd/examples/convert.rs b/third_party/rust/simd/examples/convert.rs -deleted file mode 100644 -index 11823a4b50d2..000000000000 ---- a/third_party/rust/simd/examples/convert.rs -+++ /dev/null -@@ -1,38 +0,0 @@ --extern crate simd; --use simd::f32x4; -- --#[inline(never)] --pub fn convert_scalar(x: &mut [i32], y: &[f32]) { -- assert_eq!(x.len(), y.len()); -- -- let mut i = 0; -- while i < x.len() & !3 { -- x[i] = y[i] as i32; -- i += 1; -- } --} -- --#[inline(never)] --pub fn convert(x: &mut [i32], y: &[f32]) { -- assert_eq!(x.len(), y.len()); -- -- let mut i = 0; -- while i < x.len() & !3 { -- let v = f32x4::load(y, i); -- v.to_i32().store(x, i); -- i += 4 -- } --} -- --fn main() { -- let x = &mut [0; 12]; -- let y = [1.0; 12]; -- convert(x, &y); -- convert_scalar(x, &y); -- println!("{:?}", x); -- let x = &mut [0; 16]; -- let y = [1.0; 16]; -- convert(x, &y); -- convert_scalar(x, &y); -- println!("{:?}", x); --} -diff --git a/third_party/rust/simd/examples/dot-product.rs b/third_party/rust/simd/examples/dot-product.rs -deleted file mode 100755 -index 9f0e1d35c799..000000000000 ---- a/third_party/rust/simd/examples/dot-product.rs -+++ /dev/null -@@ -1,60 +0,0 @@ --#![feature(cfg_target_feature)] --extern crate simd; --use simd::f32x4; --#[cfg(target_feature = "avx")] --use simd::x86::avx::{f32x8, LowHigh128}; -- --#[inline(never)] --pub fn dot(x: &[f32], y: &[f32]) -> f32 { -- assert_eq!(x.len(), y.len()); -- -- let len = std::cmp::min(x.len(), y.len()); -- -- let mut sum = f32x4::splat(0.0); -- let mut i = 0; -- while i < len & !3 { -- let x = f32x4::load(x, i); -- let y = f32x4::load(y, i); -- sum = sum + x * y; -- i += 4 -- } -- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) --} -- --#[cfg(target_feature = "avx")] --#[inline(never)] --pub fn dot8(x: &[f32], y: &[f32]) -> f32 { -- assert_eq!(x.len(), y.len()); -- -- let len = std::cmp::min(x.len(), y.len()); -- -- let mut sum = f32x8::splat(0.0); -- let mut i = 0; -- while i < len & !7 { -- let x = f32x8::load(x, i); -- let y = f32x8::load(y, i); -- sum = sum + x * y; -- i += 8 -- } -- let sum = sum.low() + sum.high(); -- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) --} -- -- --#[cfg(not(target_feature = "avx"))] --pub fn dot8(_: &[f32], _: &[f32]) -> f32 { -- unimplemented!() --} -- -- --fn main() { -- println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); -- println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], -- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); -- -- if cfg!(target_feature = "avx") { -- println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); -- println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], -- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); -- } --} -diff --git a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs b/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs -deleted file mode 100644 -index fa30b2283f93..000000000000 ---- a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs -+++ /dev/null -@@ -1,156 +0,0 @@ --// The Computer Language Benchmarks Game --// http://benchmarksgame.alioth.debian.org/ --// --// contributed by the Rust Project Developers --// contributed by TeXitoi -- --use std::{cmp, mem}; --use std::thread; -- --fn rotate(x: &mut [i32]) { -- let mut prev = x[0]; -- for place in x.iter_mut().rev() { -- prev = mem::replace(place, prev) -- } --} -- --fn next_permutation(perm: &mut [i32], count: &mut [i32]) { -- for i in 1..perm.len() { -- rotate(&mut perm[.. i + 1]); -- let count_i = &mut count[i]; -- if *count_i >= i as i32 { -- *count_i = 0; -- } else { -- *count_i += 1; -- break -- } -- } --} -- --#[derive(Clone, Copy)] --struct P { -- p: [i32; 16], --} -- --#[derive(Clone, Copy)] --struct Perm { -- cnt: [i32; 16], -- fact: [u32; 16], -- n: u32, -- permcount: u32, -- perm: P, --} -- --impl Perm { -- fn new(n: u32) -> Perm { -- let mut fact = [1; 16]; -- for i in 1 .. n as usize + 1 { -- fact[i] = fact[i - 1] * i as u32; -- } -- Perm { -- cnt: [0; 16], -- fact: fact, -- n: n, -- permcount: 0, -- perm: P { p: [0; 16 ] } -- } -- } -- -- fn get(&mut self, mut idx: i32) -> P { -- let mut pp = [0u8; 16]; -- self.permcount = idx as u32; -- for (i, place) in self.perm.p.iter_mut().enumerate() { -- *place = i as i32 + 1; -- } -- -- for i in (1 .. self.n as usize).rev() { -- let d = idx / self.fact[i] as i32; -- self.cnt[i] = d; -- idx %= self.fact[i] as i32; -- for (place, val) in pp.iter_mut().zip(self.perm.p[..(i+1)].iter()) { -- *place = (*val) as u8 -- } -- -- let d = d as usize; -- for j in 0 .. i + 1 { -- self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32; -- } -- } -- -- self.perm -- } -- -- fn count(&self) -> u32 { self.permcount } -- fn max(&self) -> u32 { self.fact[self.n as usize] } -- -- fn next(&mut self) -> P { -- next_permutation(&mut self.perm.p, &mut self.cnt); -- self.permcount += 1; -- -- self.perm -- } --} -- -- --fn reverse(tperm: &mut [i32], k: usize) { -- tperm[..k].reverse() --} -- --fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) { -- let mut checksum = 0; -- let mut maxflips = 0; -- -- let mut p = perm.get(n as i32); -- -- while perm.count() < max as u32 { -- let mut flips = 0; -- -- while p.p[0] != 1 { -- let k = p.p[0] as usize; -- reverse(&mut p.p, k); -- flips += 1; -- } -- -- checksum += if perm.count() % 2 == 0 {flips} else {-flips}; -- maxflips = cmp::max(maxflips, flips); -- -- p = perm.next(); -- } -- -- (checksum, maxflips) --} -- --fn fannkuch(n: i32) -> (i32, i32) { -- let perm = Perm::new(n as u32); -- -- let n = 1; -- let mut futures = vec![]; -- let k = perm.max() / n; -- -- for j in (0..).map(|x| x * k).take_while(|&j| j < k * n) { -- let max = cmp::min(j+k, perm.max()); -- -- futures.push(thread::spawn(move|| { -- work(perm, j as usize, max as usize) -- })) -- } -- -- let mut checksum = 0; -- let mut maxflips = 0; -- for fut in futures.into_iter() { -- let (cs, mf) = fut.join().unwrap(); -- checksum += cs; -- maxflips = cmp::max(maxflips, mf); -- } -- (checksum, maxflips) --} -- --fn main() { -- let n = std::env::args_os().nth(1) -- .and_then(|s| s.into_string().ok()) -- .and_then(|n| n.parse().ok()) -- .unwrap_or(7); -- -- let (checksum, maxflips) = fannkuch(n); -- println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips); --} -diff --git a/third_party/rust/simd/examples/fannkuch-redux.rs b/third_party/rust/simd/examples/fannkuch-redux.rs -deleted file mode 100755 -index 2e52ae721135..000000000000 ---- a/third_party/rust/simd/examples/fannkuch-redux.rs -+++ /dev/null -@@ -1,233 +0,0 @@ --#![feature(cfg_target_feature)] --extern crate simd; --#[macro_use] extern crate cfg_if; --use simd::u8x16; -- --use std::{env, process}; -- --cfg_if! { -- if #[cfg(target_arch = "aarch64")] { -- #[inline(always)] -- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { -- use simd::aarch64::neon::*; -- y.table_lookup_1(x) -- } -- } else if #[cfg(all(target_arch = "arm", -- target_feature = "neon"))] { -- #[inline(always)] -- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { -- use simd::arm::neon::*; -- #[inline(always)] -- fn split(x: u8x16) -> (u8x8, u8x8) { -- unsafe {std::mem::transmute(x)} -- } -- fn join(x: u8x8, y: u8x8) -> u8x16 { -- unsafe {std::mem::transmute((x, y))} -- } -- -- let (t0, t1) = split(x); -- let (i0, i1) = split(y); -- join(i0.table_lookup_2(t0, t1), -- i1.table_lookup_2(t0, t1)) -- } -- } else if #[cfg(target_feature = "ssse3")] { -- #[inline(always)] -- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { -- use simd::x86::ssse3::*; -- x.shuffle_bytes(y) -- } -- } else { -- // slow fallback, so tests work -- #[inline(always)] -- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { -- u8x16::new(x.extract(y.extract(0) as u32), -- x.extract(y.extract(1) as u32), -- x.extract(y.extract(2) as u32), -- x.extract(y.extract(3) as u32), -- x.extract(y.extract(4) as u32), -- x.extract(y.extract(5) as u32), -- x.extract(y.extract(6) as u32), -- x.extract(y.extract(7) as u32), -- x.extract(y.extract(8) as u32), -- x.extract(y.extract(9) as u32), -- x.extract(y.extract(10) as u32), -- x.extract(y.extract(11) as u32), -- x.extract(y.extract(12) as u32), -- x.extract(y.extract(13) as u32), -- x.extract(y.extract(14) as u32), -- x.extract(y.extract(15) as u32)) -- } -- } --} --struct State { -- s: [u8; 16], -- flip_masks: [u8x16; 16], -- rotate_masks: [u8x16; 16], -- -- maxflips: i32, -- odd: u16, -- checksum: i32, --} --impl State { -- fn new() -> State { -- State { -- s: [0; 16], -- flip_masks: [u8x16::splat(0); 16], -- rotate_masks: [u8x16::splat(0); 16], -- -- maxflips: 0, -- odd: 0, -- checksum: 0, -- } -- } -- #[inline(never)] -- fn rotate_sisd(&mut self, n: usize) { -- let c = self.s[0]; -- for i in 1..(n + 1) { -- self.s[i - 1] = self.s[i]; -- } -- self.s[n] = c; -- } -- #[inline(never)] -- fn popmasks(&mut self) { -- let mut mask = [0_u8; 16]; -- for i in 0..16 { -- for j in 0..16 { mask[j] = j as u8; } -- -- for x in 0..(i+1)/2 { -- mask.swap(x, i - x); -- } -- -- self.flip_masks[i] = u8x16::load(&mask, 0); -- -- for j in 0..16 { self.s[j] = j as u8; } -- self.rotate_sisd(i); -- self.rotate_masks[i] = self.load_s(); -- } -- } -- fn rotate(&mut self, n: usize) { -- shuffle(self.load_s(), self.rotate_masks[n]).store(&mut self.s, 0) -- } -- -- fn load_s(&self) -> u8x16 { -- u8x16::load(&self.s, 0) -- } -- -- -- #[inline(never)] -- fn tk(&mut self, n: usize) { -- #[derive(Copy, Clone, Debug)] -- struct Perm { -- perm: u8x16, -- start: u8, -- odd: u16 -- } -- -- let mut perms = [Perm { perm: u8x16::splat(0), start: 0 , odd: 0 }; 60]; -- -- let mut i = 0; -- let mut c = [0_u8; 16]; -- let mut perm_max = 0; -- -- while i < n { -- while i < n && perm_max < 60 { -- self.rotate(i); -- if c[i] as usize >= i { -- c[i] = 0; -- i += 1; -- continue -- } -- -- c[i] += 1; -- i = 1; -- self.odd = !self.odd; -- if self.s[0] != 0 { -- if self.s[self.s[0] as usize] != 0 { -- perms[perm_max].perm = self.load_s(); -- perms[perm_max].start = self.s[0]; -- perms[perm_max].odd = self.odd; -- perm_max += 1; -- } else { -- if self.maxflips == 0 { self.maxflips = 1 } -- self.checksum += if self.odd != 0 { -1 } else { 1 }; -- } -- } -- } -- -- let mut k = 0; -- while k < std::cmp::max(1, perm_max) - 1 { -- let pk = &perms[k]; -- let pk1 = &perms[k + 1]; -- //println!("perm1 {:?}\nperm2 {:?}", pk.perm, pk1.perm); -- let mut perm1 = pk.perm; -- let mut perm2 = pk1.perm; -- -- let mut f1 = 0; -- let mut f2 = 0; -- let mut toterm1 = pk.start; -- let mut toterm2 = pk1.start; -- -- while toterm1 != 0 && toterm2 != 0 { -- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); -- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); -- toterm1 = perm1.extract(0); -- toterm2 = perm2.extract(0); -- -- f1 += 1; f2 += 1; -- } -- while toterm1 != 0 { -- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); -- toterm1 = perm1.extract(0); -- f1 += 1; -- } -- while toterm2 != 0 { -- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); -- toterm2 = perm2.extract(0); -- f2 += 1; -- } -- -- if f1 > self.maxflips { self.maxflips = f1 } -- if f2 > self.maxflips { self.maxflips = f2 } -- self.checksum += if pk.odd != 0 { -f1 } else { f1 }; -- self.checksum += if pk1.odd != 0 { -f2 } else { f2 }; -- -- k += 2; -- } -- while k < perm_max { -- let pk = &perms[k]; -- let mut perm = pk.perm; -- let mut f = 0; -- let mut toterm = pk.start; -- while toterm != 0 { -- perm = shuffle(perm, self.flip_masks[toterm as usize]); -- toterm = perm.extract(0); -- f += 1; -- } -- if f > self.maxflips { self.maxflips = f } -- self.checksum += if pk.odd != 0 { -f } else { f }; -- k += 1 -- } -- perm_max = 0; -- } -- } --} -- --fn main() { -- let mut state = State::new(); -- state.popmasks(); -- -- let args = env::args().collect::>(); -- if args.len() < 2 { -- println!("usage: {} number", args[0]); -- process::exit(1) -- } -- let max_n = args[1].parse().unwrap(); -- if max_n < 3 || max_n > 15 { -- println!("range: must be 3 <= n <= 14"); -- process::exit(1); -- } -- for i in 0..max_n { state.s[i] = i as u8 } -- state.tk(max_n); -- -- println!("{}\nPfannkuchen({}) = {}", state.checksum, max_n, state.maxflips); --} -diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs -deleted file mode 100755 -index c6f1320a0784..000000000000 ---- a/third_party/rust/simd/examples/mandelbrot.rs -+++ /dev/null -@@ -1,125 +0,0 @@ --#![feature(iterator_step_by, test)] -- --extern crate test; --extern crate simd; --use simd::{f32x4, u32x4}; --use std::io::prelude::*; -- --#[inline(never)] --fn mandelbrot_naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { -- let mut x = c_x; -- let mut y = c_y; -- let mut count = 0; -- while count < max_iter { -- let xy = x * y; -- let xx = x * x; -- let yy = y * y; -- let sum = xx + yy; -- if sum > 4.0 { -- break -- } -- count += 1; -- x = xx - yy + c_x; -- y = xy * 2.0 + c_y; -- } -- count --} -- --#[inline(never)] --fn mandelbrot_vector(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { -- let mut x = c_x; -- let mut y = c_y; -- -- let mut count = u32x4::splat(0); -- for _ in 0..max_iter as usize { -- let xy = x * y; -- let xx = x * x; -- let yy = y * y; -- let sum = xx + yy; -- let mask = sum.lt(f32x4::splat(4.0)); -- -- if !mask.any() { break } -- count = count + mask.to_i().select(u32x4::splat(1), -- u32x4::splat(0)); -- -- x = xx - yy + c_x; -- y = xy + xy + c_y; -- } -- count --} -- --const COLOURS: &'static [(f32, f32, f32)] = &[(0.0, 7.0, 100.0), -- (32.0, 107.0, 203.0), -- (237.0, 255.0, 255.0), -- (255.0, 170.0, 0.0), -- (0.0, 2.0, 0.0)]; --const SCALE: f32 = 12.0; --const LIMIT: u32 = 100; -- --#[inline(never)] --fn output_one(buf: &mut [u8], val: u32) { -- let (r, g, b); -- if val == LIMIT { -- r = 0; -- g = 0; -- b = 0; -- } else { -- let val = (val as f32 % SCALE) * (COLOURS.len() as f32) / SCALE; -- let left = val as usize % COLOURS.len(); -- let right = (left + 1) % COLOURS.len(); -- -- let p = val - left as f32; -- let (r1, g1, b1) = COLOURS[left]; -- let (r2, g2, b2) = COLOURS[right]; -- r = (r1 + (r2 - r1) * p) as u8; -- g = (g1 + (g2 - g1) * p) as u8; -- b = (b1 + (b2 - b1) * p) as u8; -- } -- buf[0] = r; -- buf[1] = g; -- buf[2] = b; --} -- --fn main() { -- let mut args = std::env::args(); -- args.next(); -- let width = args.next().unwrap().parse().unwrap(); -- let height = args.next().unwrap().parse().unwrap(); -- -- let left = -2.2; -- let right = left + 3.0; -- let top = 1.0; -- let bottom = top - 2.0; -- -- let width_step: f32 = (right - left) / width as f32; -- let height_step: f32 = (bottom - top) / height as f32; -- -- let adjust = f32x4::splat(width_step) * f32x4::new(0., 1., 2., 3.); -- -- println!("P6 {} {} 255", width, height); -- let mut line = vec![0; width * 3]; -- -- if args.next().is_none() { -- for i in 0..height { -- let y = f32x4::splat(top + height_step * i as f32); -- for j in (0..width).step_by(4) { -- let x = f32x4::splat(left + width_step * j as f32) + adjust; -- let ret = mandelbrot_vector(x, y, LIMIT); -- test::black_box(ret); -- for k in 0..4 { let val = ret.extract(k as u32); output_one(&mut line[3*(j + k)..3*(j + k + 1)], val); } -- } -- ::std::io::stdout().write(&line).unwrap(); -- } -- } else { -- for i in 0..height { -- let y = top + height_step * i as f32; -- for j in 0..width { -- let x = left + width_step * j as f32; -- let val = mandelbrot_naive(x, y, LIMIT); -- test::black_box(val); -- output_one(&mut line[3*j..3*(j + 1)], val); -- } -- ::std::io::stdout().write(&line).unwrap(); -- } -- } --} -diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs -deleted file mode 100644 -index e6eb7ffc4655..000000000000 ---- a/third_party/rust/simd/examples/matrix-inverse.rs -+++ /dev/null -@@ -1,281 +0,0 @@ --extern crate simd; --use simd::f32x4; -- --fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] { -- let y0 = y[0]; -- let y1 = y[1]; -- let y2 = y[2]; -- let y3 = y[3]; -- [f32x4::splat(y0.extract(0)) * x[0] + -- f32x4::splat(y0.extract(1)) * x[1] + -- f32x4::splat(y0.extract(2)) * x[2] + -- f32x4::splat(y0.extract(3)) * x[3], -- f32x4::splat(y1.extract(0)) * x[0] + -- f32x4::splat(y1.extract(1)) * x[1] + -- f32x4::splat(y1.extract(2)) * x[2] + -- f32x4::splat(y1.extract(3)) * x[3], -- f32x4::splat(y2.extract(0)) * x[0] + -- f32x4::splat(y2.extract(1)) * x[1] + -- f32x4::splat(y2.extract(2)) * x[2] + -- f32x4::splat(y2.extract(3)) * x[3], -- f32x4::splat(y3.extract(0)) * x[0] + -- f32x4::splat(y3.extract(1)) * x[1] + -- f32x4::splat(y3.extract(2)) * x[2] + -- f32x4::splat(y3.extract(3)) * x[3], -- ] --} -- --#[allow(dead_code)] --fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] { -- let mut t = [[0_f32; 4]; 4]; -- for i in 0..4 { -- t[0][i] = x[i][0]; -- t[1][i] = x[i][1]; -- t[2][i] = x[i][2]; -- t[3][i] = x[i][3]; -- } -- println!("{:?}", t); -- -- let _0 = t[2][2] * t[3][3]; -- let _1 = t[2][3] * t[3][2]; -- let _2 = t[2][1] * t[3][3]; -- let _3 = t[2][3] * t[3][1]; -- let _4 = t[2][1] * t[3][2]; -- let _5 = t[2][2] * t[3][1]; -- let _6 = t[2][0] * t[3][3]; -- let _7 = t[2][3] * t[3][0]; -- let _8 = t[2][0] * t[3][2]; -- let _9 = t[2][2] * t[3][0]; -- let _10 = t[2][0] * t[3][1]; -- let _11 = t[2][1] * t[3][0]; -- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; -- println!("{:?}", v); -- -- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - -- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); -- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - -- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); -- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - -- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); -- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - -- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); -- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - -- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); -- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - -- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); -- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - -- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); -- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - -- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); -- -- println!("{:?}", [d00, d01, d02, d03, d10, d11, d12, d13]); -- -- let _0 = t[0][2] * t[1][3]; -- let _1 = t[0][3] * t[1][2]; -- let _2 = t[0][1] * t[1][3]; -- let _3 = t[0][3] * t[1][1]; -- let _4 = t[0][1] * t[1][2]; -- let _5 = t[0][2] * t[1][1]; -- let _6 = t[0][0] * t[1][3]; -- let _7 = t[0][3] * t[1][0]; -- let _8 = t[0][0] * t[1][2]; -- let _9 = t[0][2] * t[1][0]; -- let _10 = t[0][0] * t[1][1]; -- let _11 = t[0][1] * t[1][0]; -- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; -- println!("{:?}", v); -- -- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- -- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); -- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- -- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); -- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- -- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); -- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- -- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); -- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- -- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); -- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- -- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); -- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- -- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); -- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- -- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); -- -- println!("{:?}", [d20, d21, d22, d23, d30, d31, d32, d33]); -- -- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; -- -- let det = 1.0 / det; -- let mut ret = [[d00, d01, d02, d03], -- [d10, d11, d12, d13], -- [d20, d21, d22, d23], -- [d30, d31, d32, d33]]; -- for i in 0..4 { -- for j in 0..4 { -- ret[i][j] *= det; -- } -- } -- ret --} -- --fn inverse_simd4(x: &[f32x4; 4]) -> [f32x4; 4] { -- let src0 = x[0]; -- let src1 = x[1]; -- let src2 = x[2]; -- let src3 = x[3]; -- -- let tmp1 = f32x4::new(src0.extract(0), src0.extract(1), -- src1.extract(4 - 4), src1.extract(5 - 4)); -- let row1 = f32x4::new(src2.extract(0), src2.extract(1), -- src3.extract(4 - 4), src3.extract(5 - 4)); -- let row0 = f32x4::new(tmp1.extract(0), tmp1.extract(2), -- row1.extract(4 - 4), row1.extract(6 - 4)); -- let row1 = f32x4::new(row1.extract(1), row1.extract(3), -- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); -- -- let tmp1 = f32x4::new(src0.extract(2), src0.extract(3), -- src1.extract(6 - 4), src1.extract(7 - 4)); -- let row3 = f32x4::new(src2.extract(2), src2.extract(3), -- src3.extract(6 - 4), src3.extract(7 - 4)); -- let row2 = f32x4::new(tmp1.extract(0), tmp1.extract(2), -- row3.extract(4 - 4), row3.extract(6 - 4)); -- let row3 = f32x4::new(row3.extract(1), row3.extract(3), -- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); -- -- -- let tmp1 = row2 * row3; -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let minor0 = row1 * tmp1; -- let minor1 = row0 * tmp1; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- let minor0 = (row1 * tmp1) - minor0; -- let minor1 = (row0 * tmp1) - minor1; -- let minor1 = f32x4::new(minor1.extract(2), minor1.extract(3), -- minor1.extract(0), minor1.extract(1)); -- //println!("{:?}", minor1); -- -- -- let tmp1 = row1 * row2; -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let minor0 = (row3 * tmp1) + minor0; -- let minor3 = row0 * tmp1; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- -- let minor0 = minor0 - row3 * tmp1; -- let minor3 = row0 * tmp1 - minor3; -- let minor3 = f32x4::new(minor3.extract(2), minor3.extract(3), -- minor3.extract(0), minor3.extract(1)); -- //println!("{:?}", minor1); -- -- -- let tmp1 = row3 * f32x4::new(row1.extract(2), row1.extract(3), -- row1.extract(0), row1.extract(1)); -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let row2 = f32x4::new(row2.extract(2), row2.extract(3), -- row2.extract(0), row2.extract(1)); -- let minor0 = row2 * tmp1 + minor0; -- let minor2 = row0 * tmp1; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- let minor0 = minor0 - row2 * tmp1; -- let minor2 = row0 * tmp1 - minor2; -- let minor2 = f32x4::new(minor2.extract(2), minor2.extract(3), -- minor2.extract(0), minor2.extract(1)); -- //println!("{:?}", minor1); -- -- -- let tmp1 = row0 * row1; -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let minor2 = minor2 + row3 * tmp1; -- let minor3 = row2 * tmp1 - minor3; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- let minor2 = row3 * tmp1 - minor2; -- let minor3 = minor3 - row2 * tmp1; -- //println!("{:?}", minor1); -- -- -- -- let tmp1 = row0 * row3; -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let minor1 = minor1 - row2 * tmp1; -- let minor2 = row1 * tmp1 + minor2; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- let minor1 = row2 * tmp1 + minor1; -- let minor2 = minor2 - row1 * tmp1; -- //println!("{:?}", minor1); -- -- let tmp1 = row0 * row2; -- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), -- tmp1.extract(3), tmp1.extract(2)); -- let minor1 = row3 * tmp1 + minor1; -- let minor3 = minor3 - row1 * tmp1; -- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), -- tmp1.extract(0), tmp1.extract(1)); -- let minor1 = minor1 - row3 * tmp1; -- let minor3 = row1 * tmp1 + minor3; -- //println!("{:?}", minor1); -- -- let det = row0 * minor0; -- let det = f32x4::new(det.extract(2), det.extract(3), -- det.extract(0), det.extract(1)) + det; -- let det = f32x4::new(det.extract(1), det.extract(0), -- det.extract(3), det.extract(2)) + det; -- let tmp1 = det.approx_reciprocal(); -- let det = tmp1 + tmp1 - det * tmp1 * tmp1; -- --// let det = f32x4::splat(det.extract(0)); -- -- [minor0 * det, minor1 * det, minor2 * det, minor3 * det] --} -- --fn p(x: &[f32x4; 4]) { -- for xx in x { -- for i in 0..4 { -- let v = xx.extract(i); -- if v == 0.0 { -- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, ""); -- } else { -- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, xx.extract(i)); -- } -- } -- println!(" |"); -- } --} -- --fn main() { -- let x = [f32x4::new(-100.0, 6.0, 100.0, 1.0), -- f32x4::new(3.0, 1.0, 0.0, 1.0), -- f32x4::new(2.0, 1.0, 1.0, 1.0), -- f32x4::new(-10.0, 1.0, 1.0, 1.0)]; -- -- /* let mut x_ = [[0.0; 4]; 4]; -- for i in 0..4 { -- for j in 0..4 { -- x_[i][j] = x[i].extract(j as u32) -- } -- } -- -- let ret = inverse_naive(&x_); -- let mut y = [f32x4::splat(0.0); 4]; -- for i in 0..4 { -- for j in 0..4 { -- y[i] = y[i].replace(j as u32, ret[i][j]) -- } --}*/ -- let y = inverse_simd4(&x); -- p(&x); -- println!(""); -- p(&y); -- println!(""); -- p(&mul(&x, &y)) --} -diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs -deleted file mode 100644 -index d5f1bb422ff2..000000000000 ---- a/third_party/rust/simd/examples/nbody-nosimd.rs -+++ /dev/null -@@ -1,156 +0,0 @@ --// The Computer Language Benchmarks Game --// http://benchmarksgame.alioth.debian.org/ --// --// contributed by the Rust Project Developers --// contributed by TeXitoi -- --const PI: f64 = 3.141592653589793; --const SOLAR_MASS: f64 = 4.0 * PI * PI; --const YEAR: f64 = 365.24; --const N_BODIES: usize = 5; -- --static BODIES: [Planet;N_BODIES] = [ -- // Sun -- Planet { -- x: 0.0, y: 0.0, z: 0.0, -- vx: 0.0, vy: 0.0, vz: 0.0, -- mass: SOLAR_MASS, -- }, -- // Jupiter -- Planet { -- x: 4.84143144246472090e+00, -- y: -1.16032004402742839e+00, -- z: -1.03622044471123109e-01, -- vx: 1.66007664274403694e-03 * YEAR, -- vy: 7.69901118419740425e-03 * YEAR, -- vz: -6.90460016972063023e-05 * YEAR, -- mass: 9.54791938424326609e-04 * SOLAR_MASS, -- }, -- // Saturn -- Planet { -- x: 8.34336671824457987e+00, -- y: 4.12479856412430479e+00, -- z: -4.03523417114321381e-01, -- vx: -2.76742510726862411e-03 * YEAR, -- vy: 4.99852801234917238e-03 * YEAR, -- vz: 2.30417297573763929e-05 * YEAR, -- mass: 2.85885980666130812e-04 * SOLAR_MASS, -- }, -- // Uranus -- Planet { -- x: 1.28943695621391310e+01, -- y: -1.51111514016986312e+01, -- z: -2.23307578892655734e-01, -- vx: 2.96460137564761618e-03 * YEAR, -- vy: 2.37847173959480950e-03 * YEAR, -- vz: -2.96589568540237556e-05 * YEAR, -- mass: 4.36624404335156298e-05 * SOLAR_MASS, -- }, -- // Neptune -- Planet { -- x: 1.53796971148509165e+01, -- y: -2.59193146099879641e+01, -- z: 1.79258772950371181e-01, -- vx: 2.68067772490389322e-03 * YEAR, -- vy: 1.62824170038242295e-03 * YEAR, -- vz: -9.51592254519715870e-05 * YEAR, -- mass: 5.15138902046611451e-05 * SOLAR_MASS, -- }, --]; -- --#[derive(Clone, Copy)] --struct Planet { -- x: f64, y: f64, z: f64, -- vx: f64, vy: f64, vz: f64, -- mass: f64, --} -- --fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) { -- for _ in 0..steps { -- let mut b_slice: &mut [_] = bodies; -- loop { -- let bi = match shift_mut_ref(&mut b_slice) { -- Some(bi) => bi, -- None => break -- }; -- for bj in b_slice.iter_mut() { -- let dx = bi.x - bj.x; -- let dy = bi.y - bj.y; -- let dz = bi.z - bj.z; -- -- let d2 = dx * dx + dy * dy + dz * dz; -- let mag = dt / (d2 * d2.sqrt()); -- -- let massj_mag = bj.mass * mag; -- bi.vx -= dx * massj_mag; -- bi.vy -= dy * massj_mag; -- bi.vz -= dz * massj_mag; -- -- let massi_mag = bi.mass * mag; -- bj.vx += dx * massi_mag; -- bj.vy += dy * massi_mag; -- bj.vz += dz * massi_mag; -- } -- bi.x += dt * bi.vx; -- bi.y += dt * bi.vy; -- bi.z += dt * bi.vz; -- } -- } --} -- --fn energy(bodies: &[Planet;N_BODIES]) -> f64 { -- let mut e = 0.0; -- let mut bodies = bodies.iter(); -- loop { -- let bi = match bodies.next() { -- Some(bi) => bi, -- None => break -- }; -- e += (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz) * bi.mass / 2.0; -- for bj in bodies.clone() { -- let dx = bi.x - bj.x; -- let dy = bi.y - bj.y; -- let dz = bi.z - bj.z; -- let dist = (dx * dx + dy * dy + dz * dz).sqrt(); -- e -= bi.mass * bj.mass / dist; -- } -- } -- e --} -- --fn offset_momentum(bodies: &mut [Planet;N_BODIES]) { -- let mut px = 0.0; -- let mut py = 0.0; -- let mut pz = 0.0; -- for bi in bodies.iter() { -- px += bi.vx * bi.mass; -- py += bi.vy * bi.mass; -- pz += bi.vz * bi.mass; -- } -- let sun = &mut bodies[0]; -- sun.vx = - px / SOLAR_MASS; -- sun.vy = - py / SOLAR_MASS; -- sun.vz = - pz / SOLAR_MASS; --} -- --fn main() { -- let n = std::env::args().nth(1).expect("need one arg").parse().unwrap(); -- let mut bodies = BODIES; -- -- offset_momentum(&mut bodies); -- println!("{:.9}", energy(&bodies)); -- -- advance(&mut bodies, 0.01, n); -- -- println!("{:.9}", energy(&bodies)); --} -- --/// Pop a mutable reference off the head of a slice, mutating the slice to no --/// longer contain the mutable reference. --fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> { -- if r.len() == 0 { return None } -- let tmp = std::mem::replace(r, &mut []); -- let (h, t) = tmp.split_at_mut(1); -- *r = t; -- Some(&mut h[0]) --} -diff --git a/third_party/rust/simd/examples/nbody.rs b/third_party/rust/simd/examples/nbody.rs -deleted file mode 100755 -index d6d4e88e3741..000000000000 ---- a/third_party/rust/simd/examples/nbody.rs -+++ /dev/null -@@ -1,170 +0,0 @@ --#![feature(cfg_target_feature)] -- --extern crate simd; -- --#[cfg(target_feature = "sse2")] --use simd::x86::sse2::*; --#[cfg(target_arch = "aarch64")] --use simd::aarch64::neon::*; -- --const PI: f64 = 3.141592653589793; --const SOLAR_MASS: f64 = 4.0 * PI * PI; --const DAYS_PER_YEAR: f64 = 365.24; -- --struct Body { -- x: [f64; 3], -- _fill: f64, -- v: [f64; 3], -- mass: f64, --} -- --impl Body { -- fn new(x0: f64, x1: f64, x2: f64, -- v0: f64, v1: f64, v2: f64, -- mass: f64) -> Body { -- Body { -- x: [x0, x1, x2], -- _fill: 0.0, -- v: [v0, v1, v2], -- mass: mass, -- } -- } --} -- --const N_BODIES: usize = 5; --const N: usize = N_BODIES * (N_BODIES - 1) / 2; --fn offset_momentum(bodies: &mut [Body; N_BODIES]) { -- let (sun, rest) = bodies.split_at_mut(1); -- let sun = &mut sun[0]; -- for body in rest { -- for k in 0..3 { -- sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS; -- } -- } --} --fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { -- let mut r = [[0.0; 4]; N]; -- let mut mag = [0.0; N]; -- -- let mut dx = [f64x2::splat(0.0); 3]; -- let mut dsquared; -- let mut distance; -- let mut dmag; -- -- let mut i = 0; -- for j in 0..N_BODIES { -- for k in j+1..N_BODIES { -- for m in 0..3 { -- r[i][m] = bodies[j].x[m] - bodies[k].x[m]; -- } -- i += 1; -- } -- } -- -- i = 0; -- while i < N { -- for m in 0..3 { -- dx[m] = f64x2::new(r[i][m], r[i+1][m]); -- } -- -- dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; -- distance = dsquared.to_f32().approx_rsqrt().to_f64(); -- for _ in 0..2 { -- distance = distance * f64x2::splat(1.5) - -- ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance) -- } -- dmag = f64x2::splat(dt) / dsquared * distance; -- dmag.store(&mut mag, i); -- -- i += 2; -- } -- -- i = 0; -- for j in 0..N_BODIES { -- for k in j+1..N_BODIES { -- for m in 0..3 { -- bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i]; -- bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i]; -- } -- i += 1 -- } -- } -- for body in bodies { -- for m in 0..3 { -- body.x[m] += dt * body.v[m] -- } -- } --} -- --fn energy(bodies: &[Body; N_BODIES]) -> f64 { -- let mut e = 0.0; -- for i in 0..N_BODIES { -- let bi = &bodies[i]; -- e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0; -- for j in i+1..N_BODIES { -- let bj = &bodies[j]; -- let mut dx = [0.0; 3]; -- for k in 0..3 { -- dx[k] = bi.x[k] - bj.x[k]; -- } -- let mut distance = 0.0; -- for &d in &dx { distance += d * d } -- e -= bi.mass * bj.mass / distance.sqrt() -- } -- } -- e --} -- --fn main() { -- let mut bodies: [Body; N_BODIES] = [ -- /* sun */ -- Body::new(0.0, 0.0, 0.0, -- 0.0, 0.0, 0.0, -- SOLAR_MASS), -- /* jupiter */ -- Body::new(4.84143144246472090e+00, -- -1.16032004402742839e+00, -- -1.03622044471123109e-01 , -- 1.66007664274403694e-03 * DAYS_PER_YEAR, -- 7.69901118419740425e-03 * DAYS_PER_YEAR, -- -6.90460016972063023e-05 * DAYS_PER_YEAR , -- 9.54791938424326609e-04 * SOLAR_MASS -- ), -- /* saturn */ -- Body::new(8.34336671824457987e+00, -- 4.12479856412430479e+00, -- -4.03523417114321381e-01 , -- -2.76742510726862411e-03 * DAYS_PER_YEAR, -- 4.99852801234917238e-03 * DAYS_PER_YEAR, -- 2.30417297573763929e-05 * DAYS_PER_YEAR , -- 2.85885980666130812e-04 * SOLAR_MASS -- ), -- /* uranus */ -- Body::new(1.28943695621391310e+01, -- -1.51111514016986312e+01, -- -2.23307578892655734e-01 , -- 2.96460137564761618e-03 * DAYS_PER_YEAR, -- 2.37847173959480950e-03 * DAYS_PER_YEAR, -- -2.96589568540237556e-05 * DAYS_PER_YEAR , -- 4.36624404335156298e-05 * SOLAR_MASS -- ), -- /* neptune */ -- Body::new(1.53796971148509165e+01, -- -2.59193146099879641e+01, -- 1.79258772950371181e-01 , -- 2.68067772490389322e-03 * DAYS_PER_YEAR, -- 1.62824170038242295e-03 * DAYS_PER_YEAR, -- -9.51592254519715870e-05 * DAYS_PER_YEAR , -- 5.15138902046611451e-05 * SOLAR_MASS -- ) -- ]; -- -- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); -- -- offset_momentum(&mut bodies); -- println!("{:.9}", energy(&bodies)); -- for _ in 0..n { -- advance(&mut bodies, 0.01); -- } -- println!("{:.9}", energy(&bodies)); --} -diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs -deleted file mode 100644 -index f8c919101e3c..000000000000 ---- a/third_party/rust/simd/examples/ops.rs -+++ /dev/null -@@ -1,10 +0,0 @@ --extern crate simd; -- --use simd::*; -- --#[allow(unused_variables)] --fn main() { -- let x = i32x4::splat(1_i32); -- let y = -x; -- let z = !x; --} -diff --git a/third_party/rust/simd/examples/spectral-norm-nosimd.rs b/third_party/rust/simd/examples/spectral-norm-nosimd.rs -deleted file mode 100644 -index 919f9c61990f..000000000000 ---- a/third_party/rust/simd/examples/spectral-norm-nosimd.rs -+++ /dev/null -@@ -1,106 +0,0 @@ --// The Computer Language Benchmarks Game --// http://benchmarksgame.alioth.debian.org/ --// --// contributed by the Rust Project Developers --// contributed by TeXitoi -- --#![allow(non_snake_case)] -- --use std::iter::repeat; --//use std::thread; -- --// As std::simd::f64x2 is unstable, we provide a similar interface, --// expecting llvm to autovectorize its usage. --#[allow(non_camel_case_types)] --struct f64x2(f64, f64); --impl std::ops::Add for f64x2 { -- type Output = Self; -- fn add(self, rhs: Self) -> Self { -- f64x2(self.0 + rhs.0, self.1 + rhs.1) -- } --} --impl std::ops::Div for f64x2 { -- type Output = Self; -- fn div(self, rhs: Self) -> Self { -- f64x2(self.0 / rhs.0, self.1 / rhs.1) -- } --} -- --fn main() { -- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); -- let answer = spectralnorm(n); -- println!("{:.9}", answer); --} -- --fn spectralnorm(n: usize) -> f64 { -- assert!(n % 2 == 0, "only even lengths are accepted"); -- let mut u = repeat(1.0).take(n).collect::>(); -- let mut v = u.clone(); -- let mut tmp = v.clone(); -- for _ in 0..10 { -- mult_AtAv(&u, &mut v, &mut tmp); -- mult_AtAv(&v, &mut u, &mut tmp); -- } -- (dot(&u, &v) / dot(&v, &v)).sqrt() --} -- --fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { -- mult_Av(v, tmp); -- mult_Atv(tmp, out); --} -- --fn mult_Av(v: &[f64], out: &mut [f64]) { -- parallel(out, |start, out| mult(v, out, start, |i, j| A(i, j))); --} -- --fn mult_Atv(v: &[f64], out: &mut [f64]) { -- parallel(out, |start, out| mult(v, out, start, |i, j| A(j, i))); --} -- --fn mult(v: &[f64], out: &mut [f64], start: usize, a: F) -- where F: Fn(usize, usize) -> f64 { -- for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) { -- let mut sum = f64x2(0.0, 0.0); -- for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { -- let top = f64x2(chunk[0], chunk[1]); -- let bot = f64x2(a(i, j), a(i, j + 1)); -- sum = sum + top / bot; -- } -- let f64x2(a, b) = sum; -- *slot = a + b; -- } --} -- --fn A(i: usize, j: usize) -> f64 { -- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 --} -- --fn dot(v: &[f64], u: &[f64]) -> f64 { -- v.iter().zip(u.iter()).map(|(a, b)| *a * *b).fold(0., |acc, i| acc + i) --} -- --//struct Racy(T); --//unsafe impl Send for Racy {} -- --// Executes a closure in parallel over the given mutable slice. The closure `f` --// is run in parallel and yielded the starting index within `v` as well as a --// sub-slice of `v`. --fn parallel<'a, T, F>(v: &mut [T], ref f: F) -- where T: 'static + Send + Sync, --F: Fn(usize, &mut [T]) + Sync --{ -- f(0, v); -- /*let size = v.len() / 4 + 1; -- let jhs = v.chunks_mut(size).enumerate().map(|(i, chunk)| { -- // Need to convert `f` and `chunk` to something that can cross the task -- // boundary. -- let f = Racy(f as *const F as *const usize); -- let raw = Racy((&mut chunk[0] as *mut T, chunk.len())); -- thread::spawn(move|| { -- let f = f.0 as *const F; -- let raw = raw.0; -- unsafe { (*f)(i * size, std::slice::from_raw_parts_mut(raw.0, raw.1)) } -- }) -- }).collect::>(); -- for jh in jhs { jh.join().unwrap(); }*/ --} -diff --git a/third_party/rust/simd/examples/spectral-norm.rs b/third_party/rust/simd/examples/spectral-norm.rs -deleted file mode 100755 -index 656f52e4fad0..000000000000 ---- a/third_party/rust/simd/examples/spectral-norm.rs -+++ /dev/null -@@ -1,74 +0,0 @@ --#![feature(cfg_target_feature)] --#![allow(non_snake_case)] -- --extern crate simd; -- --#[cfg(target_feature = "sse2")] --use simd::x86::sse2::f64x2; --#[cfg(target_arch = "aarch64")] --use simd::aarch64::neon::f64x2; -- --fn A(i: usize, j: usize) -> f64 { -- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 --} -- --fn dot(x: &[f64], y: &[f64]) -> f64 { -- x.iter().zip(y).map(|(&x, &y)| x * y).fold(0.0, |a, b| a + b) --} -- --fn mult_Av(v: &[f64], out: &mut [f64]) { -- assert!(v.len() == out.len()); -- assert!(v.len() % 2 == 0); -- -- for i in 0..v.len() { -- let mut sum = f64x2::splat(0.0); -- -- let mut j = 0; -- while j < v.len() { -- let b = f64x2::load(v, j); -- let a = f64x2::new(A(i, j), A(i, j + 1)); -- sum = sum + b / a; -- j += 2 -- } -- out[i] = sum.extract(0) + sum.extract(1); -- } --} -- --fn mult_Atv(v: &[f64], out: &mut [f64]) { -- assert!(v.len() == out.len()); -- assert!(v.len() % 2 == 0); -- -- for i in 0..v.len() { -- let mut sum = f64x2::splat(0.0); -- -- let mut j = 0; -- while j < v.len() { -- let b = f64x2::load(v, j); -- let a = f64x2::new(A(j, i), A(j + 1, i)); -- sum = sum + b / a; -- j += 2 -- } -- out[i] = sum.extract(0) + sum.extract(1); -- } --} -- --fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { -- mult_Av(v, tmp); -- mult_Atv(tmp, out); --} -- --fn main() { -- let mut n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); -- if n % 2 == 1 { n += 1 } -- -- let mut u = vec![1.0; n]; -- let mut v = u.clone(); -- let mut tmp = u.clone(); -- -- for _ in 0..10 { -- mult_AtAv(&u, &mut v, &mut tmp); -- mult_AtAv(&v, &mut u, &mut tmp); -- } -- -- println!("{:.9}", (dot(&u, &v) / dot(&v, &v)).sqrt()); --} -diff --git a/third_party/rust/simd/src/aarch64/mod.rs b/third_party/rust/simd/src/aarch64/mod.rs -deleted file mode 100644 -index 5ba0a302b4d1..000000000000 ---- a/third_party/rust/simd/src/aarch64/mod.rs -+++ /dev/null -@@ -1,3 +0,0 @@ --//! Features specific to AArch64 CPUs. -- --pub mod neon; -diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs -deleted file mode 100644 -index 0cca05a52788..000000000000 ---- a/third_party/rust/simd/src/aarch64/neon.rs -+++ /dev/null -@@ -1,681 +0,0 @@ --use super::super::*; --use {simd_cast, f32x2}; -- --pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct u32x2(u32, u32); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct i32x2(i32, i32); -- --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct u16x4(u16, u16, u16, u16); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct i16x4(i16, i16, i16, i16); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct u8x8(u8, u8, u8, u8, -- u8, u8, u8, u8); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct i8x8(i8, i8, i8, i8, -- i8, i8, i8, i8); -- --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct i64x1(i64); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct u64x1(u64); --#[repr(simd)] --#[derive(Copy, Clone)] --pub struct f64x1(f64); -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn aarch64_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; -- fn aarch64_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vuqadd_s8(x: i8x16, y: u8x16) -> i8x16; -- fn aarch64_vuqadd_s16(x: i16x8, y: u16x8) -> i16x8; -- fn aarch64_vuqadd_s32(x: i32x4, y: u32x4) -> i32x4; -- fn aarch64_vuqadd_s64(x: i64x2, y: u64x2) -> i64x2; -- fn aarch64_vsqadd_u8(x: u8x16, y: i8x16) -> u8x16; -- fn aarch64_vsqadd_u16(x: u16x8, y: i16x8) -> u16x8; -- fn aarch64_vsqadd_u32(x: u32x4, y: i32x4) -> u32x4; -- fn aarch64_vsqadd_u64(x: u64x2, y: i64x2) -> u64x2; -- fn aarch64_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; -- fn aarch64_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; -- fn aarch64_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; -- fn aarch64_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; -- fn aarch64_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; -- fn aarch64_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; -- fn aarch64_vfmulx_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vfmulx_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vfmulxq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vfmulxq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vfma_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vfma_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vfmaq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vmull_s8(x: i8x8, y: i8x8) -> i16x8; -- fn aarch64_vmull_u8(x: u8x8, y: u8x8) -> u16x8; -- fn aarch64_vmull_s16(x: i16x4, y: i16x4) -> i32x4; -- fn aarch64_vmull_u16(x: u16x4, y: u16x4) -> u32x4; -- fn aarch64_vmull_s32(x: i32x2, y: i32x2) -> i64x2; -- fn aarch64_vmull_u32(x: u32x2, y: u32x2) -> u64x2; -- fn aarch64_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; -- fn aarch64_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; -- fn aarch64_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; -- fn aarch64_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; -- fn aarch64_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; -- fn aarch64_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; -- fn aarch64_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; -- fn aarch64_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; -- fn aarch64_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; -- fn aarch64_vabd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vabd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vabd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vabd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vabd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vabd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vabd_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vabd_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vabdq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vmax_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vmax_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vmax_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vmax_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vmax_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vmax_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vmax_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vmax_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vmaxq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vmin_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vmin_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vmin_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vmin_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vmin_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vmin_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vmin_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vmin_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vminq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vminq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vminq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vminq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vminq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vminq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vminq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vminq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vmaxnm_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vminnm_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vminnm_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vminnmq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vminnmq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn aarch64_vshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn aarch64_vshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn aarch64_vshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn aarch64_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn aarch64_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn aarch64_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn aarch64_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn aarch64_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn aarch64_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn aarch64_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn aarch64_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn aarch64_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn aarch64_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn aarch64_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn aarch64_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn aarch64_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn aarch64_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn aarch64_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn aarch64_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn aarch64_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn aarch64_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn aarch64_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn aarch64_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn aarch64_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn aarch64_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn aarch64_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn aarch64_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn aarch64_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn aarch64_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn aarch64_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn aarch64_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn aarch64_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; -- fn aarch64_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; -- fn aarch64_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; -- fn aarch64_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; -- fn aarch64_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; -- fn aarch64_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; -- fn aarch64_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn aarch64_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn aarch64_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn aarch64_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn aarch64_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn aarch64_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn aarch64_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn aarch64_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn aarch64_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn aarch64_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn aarch64_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn aarch64_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn aarch64_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn aarch64_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn aarch64_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn aarch64_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn aarch64_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn aarch64_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn aarch64_vsri_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vsri_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vsri_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vsri_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vsri_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vsri_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vsri_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vsri_u64(x: u64x1, y: u64x1) -> u64x1; -- fn aarch64_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vsli_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vsli_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vsli_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vsli_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vsli_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vsli_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vsli_s64(x: i64x1, y: i64x1) -> i64x1; -- fn aarch64_vsli_u64(x: u64x1, y: u64x1) -> u64x1; -- fn aarch64_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vvqmovn_s16(x: i16x8) -> i8x8; -- fn aarch64_vvqmovn_u16(x: u16x8) -> u8x8; -- fn aarch64_vvqmovn_s32(x: i32x4) -> i16x4; -- fn aarch64_vvqmovn_u32(x: u32x4) -> u16x4; -- fn aarch64_vvqmovn_s64(x: i64x2) -> i32x2; -- fn aarch64_vvqmovn_u64(x: u64x2) -> u32x2; -- fn aarch64_vabs_s8(x: i8x8) -> i8x8; -- fn aarch64_vabs_s16(x: i16x4) -> i16x4; -- fn aarch64_vabs_s32(x: i32x2) -> i32x2; -- fn aarch64_vabs_s64(x: i64x1) -> i64x1; -- fn aarch64_vabsq_s8(x: i8x16) -> i8x16; -- fn aarch64_vabsq_s16(x: i16x8) -> i16x8; -- fn aarch64_vabsq_s32(x: i32x4) -> i32x4; -- fn aarch64_vabsq_s64(x: i64x2) -> i64x2; -- fn aarch64_vabs_f32(x: f32x2) -> f32x2; -- fn aarch64_vabs_f64(x: f64x1) -> f64x1; -- fn aarch64_vabsq_f32(x: f32x4) -> f32x4; -- fn aarch64_vabsq_f64(x: f64x2) -> f64x2; -- fn aarch64_vqabs_s8(x: i8x8) -> i8x8; -- fn aarch64_vqabs_s16(x: i16x4) -> i16x4; -- fn aarch64_vqabs_s32(x: i32x2) -> i32x2; -- fn aarch64_vqabs_s64(x: i64x1) -> i64x1; -- fn aarch64_vqabsq_s8(x: i8x16) -> i8x16; -- fn aarch64_vqabsq_s16(x: i16x8) -> i16x8; -- fn aarch64_vqabsq_s32(x: i32x4) -> i32x4; -- fn aarch64_vqabsq_s64(x: i64x2) -> i64x2; -- fn aarch64_vqneg_s8(x: i8x8) -> i8x8; -- fn aarch64_vqneg_s16(x: i16x4) -> i16x4; -- fn aarch64_vqneg_s32(x: i32x2) -> i32x2; -- fn aarch64_vqneg_s64(x: i64x1) -> i64x1; -- fn aarch64_vqnegq_s8(x: i8x16) -> i8x16; -- fn aarch64_vqnegq_s16(x: i16x8) -> i16x8; -- fn aarch64_vqnegq_s32(x: i32x4) -> i32x4; -- fn aarch64_vqnegq_s64(x: i64x2) -> i64x2; -- fn aarch64_vclz_s8(x: i8x8) -> i8x8; -- fn aarch64_vclz_u8(x: u8x8) -> u8x8; -- fn aarch64_vclz_s16(x: i16x4) -> i16x4; -- fn aarch64_vclz_u16(x: u16x4) -> u16x4; -- fn aarch64_vclz_s32(x: i32x2) -> i32x2; -- fn aarch64_vclz_u32(x: u32x2) -> u32x2; -- fn aarch64_vclzq_s8(x: i8x16) -> i8x16; -- fn aarch64_vclzq_u8(x: u8x16) -> u8x16; -- fn aarch64_vclzq_s16(x: i16x8) -> i16x8; -- fn aarch64_vclzq_u16(x: u16x8) -> u16x8; -- fn aarch64_vclzq_s32(x: i32x4) -> i32x4; -- fn aarch64_vclzq_u32(x: u32x4) -> u32x4; -- fn aarch64_vcls_s8(x: i8x8) -> i8x8; -- fn aarch64_vcls_u8(x: u8x8) -> u8x8; -- fn aarch64_vcls_s16(x: i16x4) -> i16x4; -- fn aarch64_vcls_u16(x: u16x4) -> u16x4; -- fn aarch64_vcls_s32(x: i32x2) -> i32x2; -- fn aarch64_vcls_u32(x: u32x2) -> u32x2; -- fn aarch64_vclsq_s8(x: i8x16) -> i8x16; -- fn aarch64_vclsq_u8(x: u8x16) -> u8x16; -- fn aarch64_vclsq_s16(x: i16x8) -> i16x8; -- fn aarch64_vclsq_u16(x: u16x8) -> u16x8; -- fn aarch64_vclsq_s32(x: i32x4) -> i32x4; -- fn aarch64_vclsq_u32(x: u32x4) -> u32x4; -- fn aarch64_vcnt_s8(x: i8x8) -> i8x8; -- fn aarch64_vcnt_u8(x: u8x8) -> u8x8; -- fn aarch64_vcntq_s8(x: i8x16) -> i8x16; -- fn aarch64_vcntq_u8(x: u8x16) -> u8x16; -- fn aarch64_vrecpe_u32(x: u32x2) -> u32x2; -- fn aarch64_vrecpe_f32(x: f32x2) -> f32x2; -- fn aarch64_vrecpe_f64(x: f64x1) -> f64x1; -- fn aarch64_vrecpeq_u32(x: u32x4) -> u32x4; -- fn aarch64_vrecpeq_f32(x: f32x4) -> f32x4; -- fn aarch64_vrecpeq_f64(x: f64x2) -> f64x2; -- fn aarch64_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vrecps_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vrecpsq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vsqrt_f32(x: f32x2) -> f32x2; -- fn aarch64_vsqrt_f64(x: f64x1) -> f64x1; -- fn aarch64_vsqrtq_f32(x: f32x4) -> f32x4; -- fn aarch64_vsqrtq_f64(x: f64x2) -> f64x2; -- fn aarch64_vrsqrte_u32(x: u32x2) -> u32x2; -- fn aarch64_vrsqrte_f32(x: f32x2) -> f32x2; -- fn aarch64_vrsqrte_f64(x: f64x1) -> f64x1; -- fn aarch64_vrsqrteq_u32(x: u32x4) -> u32x4; -- fn aarch64_vrsqrteq_f32(x: f32x4) -> f32x4; -- fn aarch64_vrsqrteq_f64(x: f64x2) -> f64x2; -- fn aarch64_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vrsqrts_f64(x: f64x1, y: f64x1) -> f64x1; -- fn aarch64_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vrsqrtsq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vrbit_s8(x: i8x8) -> i8x8; -- fn aarch64_vrbit_u8(x: u8x8) -> u8x8; -- fn aarch64_vrbitq_s8(x: i8x16) -> i8x16; -- fn aarch64_vrbitq_u8(x: u8x16) -> u8x16; -- fn aarch64_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vpaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vpaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vpaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vpaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vpaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vpaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vpaddq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vpaddq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vpaddq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vpaddq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vpaddl_s16(x: i8x8) -> i16x4; -- fn aarch64_vpaddl_u16(x: u8x8) -> u16x4; -- fn aarch64_vpaddl_s32(x: i16x4) -> i32x2; -- fn aarch64_vpaddl_u32(x: u16x4) -> u32x2; -- fn aarch64_vpaddl_s64(x: i32x2) -> i64x1; -- fn aarch64_vpaddl_u64(x: u32x2) -> u64x1; -- fn aarch64_vpaddlq_s16(x: i8x16) -> i16x8; -- fn aarch64_vpaddlq_u16(x: u8x16) -> u16x8; -- fn aarch64_vpaddlq_s32(x: i16x8) -> i32x4; -- fn aarch64_vpaddlq_u32(x: u16x8) -> u32x4; -- fn aarch64_vpaddlq_s64(x: i32x4) -> i64x2; -- fn aarch64_vpaddlq_u64(x: u32x4) -> u64x2; -- fn aarch64_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vpmaxq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vpmaxq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vpmaxq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vpmaxq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vpmaxq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vpmaxq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vpmaxq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vpmaxq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vpmaxq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vpmaxq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vpminq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vpminq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vpminq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vpmaxnm_s8(x: i8x8, y: i8x8) -> i8x8; -- fn aarch64_vpmaxnm_u8(x: u8x8, y: u8x8) -> u8x8; -- fn aarch64_vpmaxnm_s16(x: i16x4, y: i16x4) -> i16x4; -- fn aarch64_vpmaxnm_u16(x: u16x4, y: u16x4) -> u16x4; -- fn aarch64_vpmaxnm_s32(x: i32x2, y: i32x2) -> i32x2; -- fn aarch64_vpmaxnm_u32(x: u32x2, y: u32x2) -> u32x2; -- fn aarch64_vpmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vpmaxnmq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn aarch64_vpmaxnmq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vpmaxnmq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn aarch64_vpmaxnmq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn aarch64_vpmaxnmq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn aarch64_vpmaxnmq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn aarch64_vpmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vpmaxnmq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn aarch64_vpmaxnmq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn aarch64_vpmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vpminnm_f32(x: f32x2, y: f32x2) -> f32x2; -- fn aarch64_vpminnmq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn aarch64_vpminnmq_f64(x: f64x2, y: f64x2) -> f64x2; -- fn aarch64_vaddv_s8(x: i8x8) -> i8; -- fn aarch64_vaddv_u8(x: u8x8) -> u8; -- fn aarch64_vaddv_s16(x: i16x4) -> i16; -- fn aarch64_vaddv_u16(x: u16x4) -> u16; -- fn aarch64_vaddv_s32(x: i32x2) -> i32; -- fn aarch64_vaddv_u32(x: u32x2) -> u32; -- fn aarch64_vaddv_f32(x: f32x2) -> f32; -- fn aarch64_vaddvq_s8(x: i8x16) -> i8; -- fn aarch64_vaddvq_u8(x: u8x16) -> u8; -- fn aarch64_vaddvq_s16(x: i16x8) -> i16; -- fn aarch64_vaddvq_u16(x: u16x8) -> u16; -- fn aarch64_vaddvq_s32(x: i32x4) -> i32; -- fn aarch64_vaddvq_u32(x: u32x4) -> u32; -- fn aarch64_vaddvq_f32(x: f32x4) -> f32; -- fn aarch64_vaddvq_s64(x: i64x2) -> i64; -- fn aarch64_vaddvq_u64(x: u64x2) -> u64; -- fn aarch64_vaddvq_f64(x: f64x2) -> f64; -- fn aarch64_vaddlv_s8(x: i8x8) -> i16; -- fn aarch64_vaddlv_u8(x: u8x8) -> u16; -- fn aarch64_vaddlv_s16(x: i16x4) -> i32; -- fn aarch64_vaddlv_u16(x: u16x4) -> u32; -- fn aarch64_vaddlv_s32(x: i32x2) -> i64; -- fn aarch64_vaddlv_u32(x: u32x2) -> u64; -- fn aarch64_vaddlvq_s8(x: i8x16) -> i16; -- fn aarch64_vaddlvq_u8(x: u8x16) -> u16; -- fn aarch64_vaddlvq_s16(x: i16x8) -> i32; -- fn aarch64_vaddlvq_u16(x: u16x8) -> u32; -- fn aarch64_vaddlvq_s32(x: i32x4) -> i64; -- fn aarch64_vaddlvq_u32(x: u32x4) -> u64; -- fn aarch64_vmaxv_s8(x: i8x8) -> i8; -- fn aarch64_vmaxv_u8(x: u8x8) -> u8; -- fn aarch64_vmaxv_s16(x: i16x4) -> i16; -- fn aarch64_vmaxv_u16(x: u16x4) -> u16; -- fn aarch64_vmaxv_s32(x: i32x2) -> i32; -- fn aarch64_vmaxv_u32(x: u32x2) -> u32; -- fn aarch64_vmaxv_f32(x: f32x2) -> f32; -- fn aarch64_vmaxvq_s8(x: i8x16) -> i8; -- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; -- fn aarch64_vmaxvq_s16(x: i16x8) -> i16; -- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; -- fn aarch64_vmaxvq_s32(x: i32x4) -> i32; -- fn aarch64_vmaxvq_u32(x: u32x4) -> u32; -- fn aarch64_vmaxvq_f32(x: f32x4) -> f32; -- fn aarch64_vmaxvq_f64(x: f64x2) -> f64; -- fn aarch64_vminv_s8(x: i8x8) -> i8; -- fn aarch64_vminv_u8(x: u8x8) -> u8; -- fn aarch64_vminv_s16(x: i16x4) -> i16; -- fn aarch64_vminv_u16(x: u16x4) -> u16; -- fn aarch64_vminv_s32(x: i32x2) -> i32; -- fn aarch64_vminv_u32(x: u32x2) -> u32; -- fn aarch64_vminv_f32(x: f32x2) -> f32; -- fn aarch64_vminvq_s8(x: i8x16) -> i8; -- fn aarch64_vminvq_u8(x: u8x16) -> u8; -- fn aarch64_vminvq_s16(x: i16x8) -> i16; -- fn aarch64_vminvq_u16(x: u16x8) -> u16; -- fn aarch64_vminvq_s32(x: i32x4) -> i32; -- fn aarch64_vminvq_u32(x: u32x4) -> u32; -- fn aarch64_vminvq_f32(x: f32x4) -> f32; -- fn aarch64_vminvq_f64(x: f64x2) -> f64; -- fn aarch64_vmaxnmv_f32(x: f32x2) -> f32; -- fn aarch64_vmaxnmvq_f32(x: f32x4) -> f32; -- fn aarch64_vmaxnmvq_f64(x: f64x2) -> f64; -- fn aarch64_vminnmv_f32(x: f32x2) -> f32; -- fn aarch64_vminnmvq_f32(x: f32x4) -> f32; -- fn aarch64_vminnmvq_f64(x: f64x2) -> f64; -- fn aarch64_vqtbl1_s8(x: i8x16, y: u8x8) -> i8x8; -- fn aarch64_vqtbl1_u8(x: u8x16, y: u8x8) -> u8x8; -- fn aarch64_vqtbl1q_s8(x: i8x16, y: u8x16) -> i8x16; -- fn aarch64_vqtbl1q_u8(x: u8x16, y: u8x16) -> u8x16; -- fn aarch64_vqtbx1_s8(x: i8x8, y: i8x16, z: u8x8) -> i8x8; -- fn aarch64_vqtbx1_u8(x: u8x8, y: u8x16, z: u8x8) -> u8x8; -- fn aarch64_vqtbx1q_s8(x: i8x16, y: i8x16, z: u8x16) -> i8x16; -- fn aarch64_vqtbx1q_u8(x: u8x16, y: u8x16, z: u8x16) -> u8x16; -- fn aarch64_vqtbl2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; -- fn aarch64_vqtbl2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; -- fn aarch64_vqtbl2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; -- fn aarch64_vqtbl2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; -- fn aarch64_vqtbx2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; -- fn aarch64_vqtbx2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; -- fn aarch64_vqtbx2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; -- fn aarch64_vqtbx2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; -- fn aarch64_vqtbl3_s8(x: (i8x16, i8x16, i8x16), y: u8x8) -> i8x8; -- fn aarch64_vqtbl3_u8(x: (u8x16, u8x16, u8x16), y: u8x8) -> u8x8; -- fn aarch64_vqtbl3q_s8(x: (i8x16, i8x16, i8x16), y: u8x16) -> i8x16; -- fn aarch64_vqtbl3q_u8(x: (u8x16, u8x16, u8x16), y: u8x16) -> u8x16; -- fn aarch64_vqtbx3_s8(x: i8x8, y: (i8x16, i8x16, i8x16), z: u8x8) -> i8x8; -- fn aarch64_vqtbx3_u8(x: u8x8, y: (u8x16, u8x16, u8x16), z: u8x8) -> u8x8; -- fn aarch64_vqtbx3q_s8(x: i8x16, y: (i8x16, i8x16, i8x16), z: u8x16) -> i8x16; -- fn aarch64_vqtbx3q_u8(x: u8x16, y: (u8x16, u8x16, u8x16), z: u8x16) -> u8x16; -- fn aarch64_vqtbl4_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x8) -> i8x8; -- fn aarch64_vqtbl4_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x8) -> u8x8; -- fn aarch64_vqtbl4q_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x16) -> i8x16; -- fn aarch64_vqtbl4q_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x16) -> u8x16; -- fn aarch64_vqtbx4_s8(x: i8x8, y: (i8x16, i8x16, i8x16, i8x16), z: u8x8) -> i8x8; -- fn aarch64_vqtbx4_u8(x: u8x8, y: (u8x16, u8x16, u8x16, u8x16), z: u8x8) -> u8x8; -- fn aarch64_vqtbx4q_s8(x: i8x16, y: (i8x16, i8x16, i8x16, i8x16), z: u8x16) -> i8x16; -- fn aarch64_vqtbx4q_u8(x: u8x16, y: (u8x16, u8x16, u8x16, u8x16), z: u8x16) -> u8x16; --} -- --pub trait Aarch64F32x4 { -- fn to_f64(self) -> f64x2; --} --impl Aarch64F32x4 for f32x4 { -- #[inline] -- fn to_f64(self) -> f64x2 { -- unsafe { -- simd_cast(f32x2(self.0, self.1)) -- } -- } --} -- --pub trait Aarch64U8x16 { -- fn table_lookup_1(self, t0: u8x16) -> u8x16; --} --impl Aarch64U8x16 for u8x16 { -- #[inline] -- fn table_lookup_1(self, t0: u8x16) -> u8x16 { -- unsafe {aarch64_vqtbl1q_u8(t0, self)} -- } --} --pub trait Aarch64I8x16 { -- fn table_lookup_1(self, t0: i8x16) -> i8x16; --} --impl Aarch64I8x16 for i8x16 { -- #[inline] -- fn table_lookup_1(self, t0: i8x16) -> i8x16 { -- unsafe {aarch64_vqtbl2q_s8((t0, t0), ::bitcast(self))} -- } --} -- --#[doc(hidden)] --pub mod common { -- use super::super::super::*; -- use core::mem; -- -- #[inline] -- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { -- unsafe {super::aarch64_vsqrtq_f32(x)} -- } -- #[inline] -- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { -- unsafe {super::aarch64_vrsqrteq_f32(x)} -- } -- #[inline] -- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { -- unsafe {super::aarch64_vrecpeq_f32(x)} -- } -- #[inline] -- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::aarch64_vmaxq_f32(x, y)} -- } -- #[inline] -- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::aarch64_vminq_f32(x, y)} -- } -- -- macro_rules! bools { -- ($($ty: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { -- $( -- #[inline] -- pub fn $all(x: $ty) -> bool { -- unsafe { -- super::$min(mem::transmute(x)) != 0 -- } -- } -- #[inline] -- pub fn $any(x: $ty) -> bool { -- unsafe { -- super::$max(mem::transmute(x)) != 0 -- } -- } -- )* -- } -- } -- -- bools! { -- bool32fx4, bool32fx4_all(aarch64_vminvq_u32), bool32fx4_any(aarch64_vmaxvq_u32); -- bool8ix16, bool8ix16_all(aarch64_vminvq_u8), bool8ix16_any(aarch64_vmaxvq_u8); -- bool16ix8, bool16ix8_all(aarch64_vminvq_u16), bool16ix8_any(aarch64_vmaxvq_u16); -- bool32ix4, bool32ix4_all(aarch64_vminvq_u32), bool32ix4_any(aarch64_vmaxvq_u32); -- } --} -diff --git a/third_party/rust/simd/src/arm/mod.rs b/third_party/rust/simd/src/arm/mod.rs -deleted file mode 100644 -index 0d451103840b..000000000000 ---- a/third_party/rust/simd/src/arm/mod.rs -+++ /dev/null -@@ -1,4 +0,0 @@ --//! Features specific to ARM CPUs. -- --#[cfg(any(feature = "doc", target_feature = "neon"))] --pub mod neon; -diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs -deleted file mode 100644 -index 8c90a72bb0dc..000000000000 ---- a/third_party/rust/simd/src/arm/neon.rs -+++ /dev/null -@@ -1,622 +0,0 @@ --use super::super::*; --use sixty_four::{i64x2, u64x2}; -- --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct u32x2(u32, u32); --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct i32x2(i32, i32); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32ix2(i32, i32); -- --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct f32x2(f32, f32); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32fx2(i32, i32); -- --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct u16x4(u16, u16, u16, u16); --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct i16x4(i16, i16, i16, i16); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool16ix4(i16, i16, i16, i16); -- --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct u8x8(u8, u8, u8, u8, -- u8, u8, u8, u8); --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct i8x8(i8, i8, i8, i8, -- i8, i8, i8, i8); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool8ix8(i8, i8, i8, i8, -- i8, i8, i8, i8); -- --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct i64x1(i64); --#[repr(simd)] --#[derive(Debug, Copy, Clone)] --pub struct u64x1(u64); -- --macro_rules! half_bools { -- ($($ty: ty, $as_u: ty, $elem: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { -- $( -- impl $ty { -- #[inline] -- pub fn $all(self) -> bool { -- unsafe { -- let t: $as_u = bitcast(self); -- let y = $min(t, mem::uninitialized()); -- let y32: u32x2 = bitcast(y); -- y32.0 == 0xFFFFFFFF -- } -- } -- #[inline] -- pub fn $any(self) -> bool { -- unsafe { -- let t: $as_u = bitcast(self); -- let y = $max(t, mem::uninitialized()); -- let y32: u32x2 = bitcast(y); -- y32.0 != 0 -- } -- } -- } -- -- impl Clone for $ty { -- #[inline] fn clone(&self) -> Self { -- *self -- } -- } -- -- unsafe impl Simd for $ty { -- type Bool = $ty; -- type Elem = $elem; -- } -- -- )* -- } --} -- --half_bools! { -- bool32fx2, u32x2, i32, bool32fx2_all(arm_vpmin_u32), bool32fx2_any(arm_vpmax_u32); -- bool8ix8, u8x8, i8, bool8ix8_all(arm_vpmin_u8), bool8ix8_any(arm_vpmax_u8); -- bool16ix4, u16x4, i16, bool16ix4_all(arm_vpmin_u16), bool16ix4_any(arm_vpmax_u16); -- bool32ix2, u32x2, f32, bool32ix2_all(arm_vpmin_u32), bool32ix2_any(arm_vpmax_u32); --} -- --macro_rules! half_simd { -- ($($ty: ty, $elem: ty, $bool_ty: ty;)*) => { -- $( -- unsafe impl Simd for $ty { -- type Bool = $bool_ty; -- type Elem = $elem; -- } -- )* -- } --} -- --half_simd! { -- f32x2, f32, bool32fx2; -- u32x2, u32, bool32ix2; -- i32x2, i32, bool32ix2; -- u16x4, u16, bool16ix4; -- i16x4, i16, bool16ix4; -- u8x8, u8, bool8ix8; -- i8x8, i8, bool8ix8; --} -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn arm_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; -- fn arm_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn arm_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; -- fn arm_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; -- fn arm_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; -- fn arm_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; -- fn arm_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; -- fn arm_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; -- fn arm_vfma_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vmull_s8(x: i8x8, y: i8x8) -> i16x8; -- fn arm_vmull_u8(x: u8x8, y: u8x8) -> u16x8; -- fn arm_vmull_s16(x: i16x4, y: i16x4) -> i32x4; -- fn arm_vmull_u16(x: u16x4, y: u16x4) -> u32x4; -- fn arm_vmull_s32(x: i32x2, y: i32x2) -> i64x2; -- fn arm_vmull_u32(x: u32x2, y: u32x2) -> u64x2; -- fn arm_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; -- fn arm_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; -- fn arm_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; -- fn arm_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn arm_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; -- fn arm_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; -- fn arm_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; -- fn arm_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; -- fn arm_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; -- fn arm_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; -- fn arm_vabd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vabd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vabd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vabd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vabd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vabd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vabd_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vmax_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vmax_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vmax_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vmax_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vmax_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vmax_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vmax_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vmin_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vmin_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vmin_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vmin_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vmin_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vmin_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vmin_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vminq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vminq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vminq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vminq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vminq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vminq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vminq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn arm_vshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn arm_vshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn arm_vshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn arm_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn arm_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn arm_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn arm_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn arm_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn arm_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn arm_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn arm_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn arm_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn arm_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn arm_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn arm_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn arm_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn arm_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn arm_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn arm_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn arm_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn arm_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn arm_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn arm_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn arm_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; -- fn arm_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; -- fn arm_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; -- fn arm_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; -- fn arm_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; -- fn arm_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; -- fn arm_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; -- fn arm_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; -- fn arm_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; -- fn arm_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; -- fn arm_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; -- fn arm_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; -- fn arm_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; -- fn arm_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; -- fn arm_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn arm_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn arm_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn arm_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn arm_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn arm_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn arm_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn arm_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn arm_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn arm_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn arm_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn arm_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn arm_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; -- fn arm_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; -- fn arm_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; -- fn arm_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; -- fn arm_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; -- fn arm_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; -- fn arm_vsri_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vsri_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vsri_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vsri_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vsri_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vsri_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vsri_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vsri_u64(x: u64x1, y: u64x1) -> u64x1; -- fn arm_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn arm_vsli_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vsli_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vsli_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vsli_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vsli_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vsli_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vsli_s64(x: i64x1, y: i64x1) -> i64x1; -- fn arm_vsli_u64(x: u64x1, y: u64x1) -> u64x1; -- fn arm_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; -- fn arm_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn arm_vvqmovn_s16(x: i16x8) -> i8x8; -- fn arm_vvqmovn_u16(x: u16x8) -> u8x8; -- fn arm_vvqmovn_s32(x: i32x4) -> i16x4; -- fn arm_vvqmovn_u32(x: u32x4) -> u16x4; -- fn arm_vvqmovn_s64(x: i64x2) -> i32x2; -- fn arm_vvqmovn_u64(x: u64x2) -> u32x2; -- fn arm_vabs_s8(x: i8x8) -> i8x8; -- fn arm_vabs_s16(x: i16x4) -> i16x4; -- fn arm_vabs_s32(x: i32x2) -> i32x2; -- fn arm_vabsq_s8(x: i8x16) -> i8x16; -- fn arm_vabsq_s16(x: i16x8) -> i16x8; -- fn arm_vabsq_s32(x: i32x4) -> i32x4; -- fn arm_vabs_f32(x: f32x2) -> f32x2; -- fn arm_vabsq_f32(x: f32x4) -> f32x4; -- fn arm_vqabs_s8(x: i8x8) -> i8x8; -- fn arm_vqabs_s16(x: i16x4) -> i16x4; -- fn arm_vqabs_s32(x: i32x2) -> i32x2; -- fn arm_vqabsq_s8(x: i8x16) -> i8x16; -- fn arm_vqabsq_s16(x: i16x8) -> i16x8; -- fn arm_vqabsq_s32(x: i32x4) -> i32x4; -- fn arm_vqneg_s8(x: i8x8) -> i8x8; -- fn arm_vqneg_s16(x: i16x4) -> i16x4; -- fn arm_vqneg_s32(x: i32x2) -> i32x2; -- fn arm_vqnegq_s8(x: i8x16) -> i8x16; -- fn arm_vqnegq_s16(x: i16x8) -> i16x8; -- fn arm_vqnegq_s32(x: i32x4) -> i32x4; -- fn arm_vclz_s8(x: i8x8) -> i8x8; -- fn arm_vclz_u8(x: u8x8) -> u8x8; -- fn arm_vclz_s16(x: i16x4) -> i16x4; -- fn arm_vclz_u16(x: u16x4) -> u16x4; -- fn arm_vclz_s32(x: i32x2) -> i32x2; -- fn arm_vclz_u32(x: u32x2) -> u32x2; -- fn arm_vclzq_s8(x: i8x16) -> i8x16; -- fn arm_vclzq_u8(x: u8x16) -> u8x16; -- fn arm_vclzq_s16(x: i16x8) -> i16x8; -- fn arm_vclzq_u16(x: u16x8) -> u16x8; -- fn arm_vclzq_s32(x: i32x4) -> i32x4; -- fn arm_vclzq_u32(x: u32x4) -> u32x4; -- fn arm_vcls_s8(x: i8x8) -> i8x8; -- fn arm_vcls_u8(x: u8x8) -> u8x8; -- fn arm_vcls_s16(x: i16x4) -> i16x4; -- fn arm_vcls_u16(x: u16x4) -> u16x4; -- fn arm_vcls_s32(x: i32x2) -> i32x2; -- fn arm_vcls_u32(x: u32x2) -> u32x2; -- fn arm_vclsq_s8(x: i8x16) -> i8x16; -- fn arm_vclsq_u8(x: u8x16) -> u8x16; -- fn arm_vclsq_s16(x: i16x8) -> i16x8; -- fn arm_vclsq_u16(x: u16x8) -> u16x8; -- fn arm_vclsq_s32(x: i32x4) -> i32x4; -- fn arm_vclsq_u32(x: u32x4) -> u32x4; -- fn arm_vcnt_s8(x: i8x8) -> i8x8; -- fn arm_vcnt_u8(x: u8x8) -> u8x8; -- fn arm_vcntq_s8(x: i8x16) -> i8x16; -- fn arm_vcntq_u8(x: u8x16) -> u8x16; -- fn arm_vrecpe_u32(x: u32x2) -> u32x2; -- fn arm_vrecpe_f32(x: f32x2) -> f32x2; -- fn arm_vrecpeq_u32(x: u32x4) -> u32x4; -- fn arm_vrecpeq_f32(x: f32x4) -> f32x4; -- fn arm_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vsqrt_f32(x: f32x2) -> f32x2; -- fn arm_vsqrtq_f32(x: f32x4) -> f32x4; -- fn arm_vrsqrte_u32(x: u32x2) -> u32x2; -- fn arm_vrsqrte_f32(x: f32x2) -> f32x2; -- fn arm_vrsqrteq_u32(x: u32x4) -> u32x4; -- fn arm_vrsqrteq_f32(x: f32x4) -> f32x4; -- fn arm_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vbsl_s8(x: u8x8, y: i8x8) -> i8x8; -- fn arm_vbsl_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vbsl_s16(x: u16x4, y: i16x4) -> i16x4; -- fn arm_vbsl_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vbsl_s32(x: u32x2, y: i32x2) -> i32x2; -- fn arm_vbsl_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vbsl_s64(x: u64x1, y: i64x1) -> i64x1; -- fn arm_vbsl_u64(x: u64x1, y: u64x1) -> u64x1; -- fn arm_vbslq_s8(x: u8x16, y: i8x16) -> i8x16; -- fn arm_vbslq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vbslq_s16(x: u16x8, y: i16x8) -> i16x8; -- fn arm_vbslq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vbslq_s32(x: u32x4, y: i32x4) -> i32x4; -- fn arm_vbslq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vbslq_s64(x: u64x2, y: i64x2) -> i64x2; -- fn arm_vbslq_u64(x: u64x2, y: u64x2) -> u64x2; -- fn arm_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vpaddl_s16(x: i8x8) -> i16x4; -- fn arm_vpaddl_u16(x: u8x8) -> u16x4; -- fn arm_vpaddl_s32(x: i16x4) -> i32x2; -- fn arm_vpaddl_u32(x: u16x4) -> u32x2; -- fn arm_vpaddl_s64(x: i32x2) -> i64x1; -- fn arm_vpaddl_u64(x: u32x2) -> u64x1; -- fn arm_vpaddlq_s16(x: i8x16) -> i16x8; -- fn arm_vpaddlq_u16(x: u8x16) -> u16x8; -- fn arm_vpaddlq_s32(x: i16x8) -> i32x4; -- fn arm_vpaddlq_u32(x: u16x8) -> u32x4; -- fn arm_vpaddlq_s64(x: i32x4) -> i64x2; -- fn arm_vpaddlq_u64(x: u32x4) -> u64x2; -- fn arm_vpadal_s16(x: i16x4, y: i8x8) -> i16x4; -- fn arm_vpadal_u16(x: u16x4, y: u8x8) -> u16x4; -- fn arm_vpadal_s32(x: i32x2, y: i16x4) -> i32x2; -- fn arm_vpadal_u32(x: u32x2, y: u16x4) -> u32x2; -- fn arm_vpadal_s64(x: i64x1, y: i32x2) -> i64x1; -- fn arm_vpadal_u64(x: u64x1, y: u32x2) -> u64x1; -- fn arm_vpadalq_s16(x: i16x8, y: i8x16) -> i16x8; -- fn arm_vpadalq_u16(x: u16x8, y: u8x16) -> u16x8; -- fn arm_vpadalq_s32(x: i32x4, y: i16x8) -> i32x4; -- fn arm_vpadalq_u32(x: u32x4, y: u16x8) -> u32x4; -- fn arm_vpadalq_s64(x: i64x2, y: i32x4) -> i64x2; -- fn arm_vpadalq_u64(x: u64x2, y: u32x4) -> u64x2; -- fn arm_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; -- fn arm_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; -- fn arm_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; -- fn arm_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; -- fn arm_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; -- fn arm_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; -- fn arm_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; -- fn arm_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; -- fn arm_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; -- fn arm_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; -- fn arm_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; -- fn arm_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; -- fn arm_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; -- fn arm_vtbl1_s8(x: i8x8, y: u8x8) -> i8x8; -- fn arm_vtbl1_u8(x: u8x8, y: u8x8) -> u8x8; -- fn arm_vtbx1_s8(x: i8x8, y: i8x8, z: u8x8) -> i8x8; -- fn arm_vtbx1_u8(x: u8x8, y: u8x8, z: u8x8) -> u8x8; -- fn arm_vtbl2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; -- fn arm_vtbl2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; -- fn arm_vtbx2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; -- fn arm_vtbx2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; -- fn arm_vtbl3_s8(x: (i8x8, i8x8, i8x8), y: u8x8) -> i8x8; -- fn arm_vtbl3_u8(x: (u8x8, u8x8, u8x8), y: u8x8) -> u8x8; -- fn arm_vtbx3_s8(x: i8x8, y: (i8x8, i8x8, i8x8), z: u8x8) -> i8x8; -- fn arm_vtbx3_u8(x: u8x8, y: (u8x8, u8x8, u8x8), z: u8x8) -> u8x8; -- fn arm_vtbl4_s8(x: (i8x8, i8x8, i8x8, i8x8), y: u8x8) -> i8x8; -- fn arm_vtbl4_u8(x: (u8x8, u8x8, u8x8, u8x8), y: u8x8) -> u8x8; -- fn arm_vtbx4_s8(x: i8x8, y: (i8x8, i8x8, i8x8, i8x8), z: u8x8) -> i8x8; -- fn arm_vtbx4_u8(x: u8x8, y: (u8x8, u8x8, u8x8, u8x8), z: u8x8) -> u8x8; --} -- -- --impl u8x8 { -- #[inline] -- pub fn table_lookup_1(self, t0: u8x8) -> u8x8 { -- unsafe {arm_vtbl1_u8(t0, self)} -- } -- #[inline] -- pub fn table_lookup_2(self, t0: u8x8, t1: u8x8) -> u8x8 { -- unsafe {arm_vtbl2_u8((t0, t1), self)} -- } -- #[inline] -- pub fn table_lookup_3(self, t0: u8x8, t1: u8x8, t2: u8x8) -> u8x8 { -- unsafe {arm_vtbl3_u8((t0, t1, t2), self)} -- } -- #[inline] -- pub fn table_lookup_4(self, t0: u8x8, t1: u8x8, t2: u8x8, t3: u8x8) -> u8x8 { -- unsafe {arm_vtbl4_u8((t0, t1, t2, t3), self)} -- } --} -- --#[doc(hidden)] --pub mod common { -- use super::super::super::*; -- use super::*; -- use core::mem; -- -- #[inline] -- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { -- unsafe {super::arm_vsqrtq_f32(x)} -- } -- #[inline] -- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { -- unsafe {super::arm_vrsqrteq_f32(x)} -- } -- #[inline] -- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { -- unsafe {super::arm_vrecpeq_f32(x)} -- } -- #[inline] -- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::arm_vmaxq_f32(x, y)} -- } -- #[inline] -- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::arm_vminq_f32(x, y)} -- } -- -- macro_rules! bools { -- ($($ty: ty, $as_u: ty, $shuffle_fn: ident, $lo_idxs: expr, $hi_idxs: expr, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { -- $( -- #[inline] -- pub fn $all(x: $ty) -> bool { -- unsafe { -- let t: $as_u = bitcast(x); -- let lo = $shuffle_fn(t, t, $lo_idxs); -- let hi = $shuffle_fn(t, t, $hi_idxs); -- let x = super::$min(lo, hi); -- let y = super::$min(x, mem::uninitialized()); -- let y32: u32x2 = bitcast(y); -- y32.0 == 0xFFFFFFFF -- } -- } -- #[inline] -- pub fn $any(x: $ty) -> bool { -- unsafe { -- let t: $as_u = bitcast(x); -- let lo = $shuffle_fn(t, t, $lo_idxs); -- let hi = $shuffle_fn(t, t, $hi_idxs); -- let x = super::$max(lo, hi); -- let y = super::$max(x, mem::uninitialized()); -- let y32: u32x2 = bitcast(y); -- y32.0 != 0 -- } -- } -- )* -- } -- } -- -- bools! { -- bool32fx4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32fx4_all(arm_vpmin_u32), bool32fx4_any(arm_vpmax_u32); -- bool8ix16, u8x16, simd_shuffle8, [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15], bool8ix16_all(arm_vpmin_u8), bool8ix16_any(arm_vpmax_u8); -- bool16ix8, u16x8, simd_shuffle4, [0, 1, 2, 3], [4, 5, 6, 7], bool16ix8_all(arm_vpmin_u16), bool16ix8_any(arm_vpmax_u16); -- bool32ix4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32ix4_all(arm_vpmin_u32), bool32ix4_any(arm_vpmax_u32); -- } --} -diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs -deleted file mode 100644 -index 1052ae36959d..000000000000 ---- a/third_party/rust/simd/src/common.rs -+++ /dev/null -@@ -1,520 +0,0 @@ --use super::*; --#[allow(unused_imports)] --use super::{ -- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, -- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, -- simd_insert, simd_extract, -- simd_cast, -- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, -- -- Unalign, bitcast, --}; --use core::{mem,ops}; -- --#[cfg(any(target_arch = "x86", -- target_arch = "x86_64"))] --use x86::sse2::common; --#[cfg(any(target_arch = "arm"))] --use arm::neon::common; --#[cfg(any(target_arch = "aarch64"))] --use aarch64::neon::common; -- --macro_rules! basic_impls { -- ($( -- $name: ident: -- $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*; -- )*) => { -- $(impl $name { -- /// Create a new instance. -- #[inline] -- pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name { -- $name($($first),*, $($last),*) -- } -- -- /// Create a new instance where every lane has value `x`. -- #[inline] -- pub const fn splat(x: $elem) -> $name { -- $name($({ #[allow(dead_code)] struct $first; x }),*, -- $({ #[allow(dead_code)] struct $last; x }),*) -- } -- -- /// Compare for equality. -- #[inline] -- pub fn eq(self, other: Self) -> $bool { -- unsafe {simd_eq(self, other)} -- } -- /// Compare for equality. -- #[inline] -- pub fn ne(self, other: Self) -> $bool { -- unsafe {simd_ne(self, other)} -- } -- /// Compare for equality. -- #[inline] -- pub fn lt(self, other: Self) -> $bool { -- unsafe {simd_lt(self, other)} -- } -- /// Compare for equality. -- #[inline] -- pub fn le(self, other: Self) -> $bool { -- unsafe {simd_le(self, other)} -- } -- /// Compare for equality. -- #[inline] -- pub fn gt(self, other: Self) -> $bool { -- unsafe {simd_gt(self, other)} -- } -- /// Compare for equality. -- #[inline] -- pub fn ge(self, other: Self) -> $bool { -- unsafe {simd_ge(self, other)} -- } -- -- /// Extract the value of the `idx`th lane of `self`. -- /// -- /// # Panics -- /// -- /// `extract` will panic if `idx` is out of bounds. -- #[inline] -- pub fn extract(self, idx: u32) -> $elem { -- assert!(idx < $length); -- unsafe {simd_extract(self, idx)} -- } -- /// Return a new vector where the `idx`th lane is replaced -- /// by `elem`. -- /// -- /// # Panics -- /// -- /// `replace` will panic if `idx` is out of bounds. -- #[inline] -- pub fn replace(self, idx: u32, elem: $elem) -> Self { -- assert!(idx < $length); -- unsafe {simd_insert(self, idx, elem)} -- } -- -- /// Load a new value from the `idx`th position of `array`. -- /// -- /// This is equivalent to the following, but is possibly -- /// more efficient: -- /// -- /// ```rust,ignore -- /// Self::new(array[idx], array[idx + 1], ...) -- /// ``` -- /// -- /// # Panics -- /// -- /// `load` will panic if `idx` is out of bounds in -- /// `array`, or if `array[idx..]` is too short. -- #[inline] -- pub fn load(array: &[$elem], idx: usize) -> Self { -- let data = &array[idx..idx + $length]; -- let loaded = unsafe { -- *(data.as_ptr() as *const Unalign) -- }; -- loaded.0 -- } -- -- /// Store the elements of `self` to `array`, starting at -- /// the `idx`th position. -- /// -- /// This is equivalent to the following, but is possibly -- /// more efficient: -- /// -- /// ```rust,ignore -- /// array[i] = self.extract(0); -- /// array[i + 1] = self.extract(1); -- /// // ... -- /// ``` -- /// -- /// # Panics -- /// -- /// `store` will panic if `idx` is out of bounds in -- /// `array`, or if `array[idx...]` is too short. -- #[inline] -- pub fn store(self, array: &mut [$elem], idx: usize) { -- let place = &mut array[idx..idx + $length]; -- unsafe { -- *(place.as_mut_ptr() as *mut Unalign) = Unalign(self) -- } -- } -- })* -- } --} -- --basic_impls! { -- u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; -- i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; -- f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3; -- -- u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; -- i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; -- -- u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; -- i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; --} -- --macro_rules! bool_impls { -- ($( -- $name: ident: -- $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident, -- $($first: ident),* | $($last: ident),* -- [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*]; -- )*) => { -- $(impl $name { -- /// Convert to integer representation. -- #[inline] -- pub fn to_repr(self) -> $repr { -- unsafe {mem::transmute(self)} -- } -- /// Convert from integer representation. -- #[inline] -- #[inline] -- pub fn from_repr(x: $repr) -> Self { -- unsafe {mem::transmute(x)} -- } -- -- /// Create a new instance. -- #[inline] -- pub fn new($($first: bool),*, $($last: bool),*) -> $name { -- unsafe { -- // negate everything together -- simd_sub($name::splat(false), -- $name($( ($first as $repr_elem) ),*, -- $( ($last as $repr_elem) ),*)) -- } -- } -- -- /// Create a new instance where every lane has value `x`. -- #[allow(unused_variables)] -- #[inline] -- pub fn splat(x: bool) -> $name { -- let x = if x {!(0 as $repr_elem)} else {0}; -- $name($({ let $first = (); x}),*, -- $({ let $last = (); x}),*) -- } -- -- /// Extract the value of the `idx`th lane of `self`. -- /// -- /// # Panics -- /// -- /// `extract` will panic if `idx` is out of bounds. -- #[inline] -- pub fn extract(self, idx: u32) -> bool { -- assert!(idx < $length); -- unsafe {simd_extract(self.to_repr(), idx) != 0} -- } -- /// Return a new vector where the `idx`th lane is replaced -- /// by `elem`. -- /// -- /// # Panics -- /// -- /// `replace` will panic if `idx` is out of bounds. -- #[inline] -- pub fn replace(self, idx: u32, elem: bool) -> Self { -- assert!(idx < $length); -- let x = if elem {!(0 as $repr_elem)} else {0}; -- unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))} -- } -- /// Select between elements of `then` and `else_`, based on -- /// the corresponding element of `self`. -- /// -- /// This is equivalent to the following, but is possibly -- /// more efficient: -- /// -- /// ```rust,ignore -- /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) }, -- /// if self.extract(1) { then.extract(1) } else { else_.extract(1) }, -- /// ...) -- /// ``` -- #[inline] -- pub fn select>(self, then: T, else_: T) -> T { -- let then: $repr = bitcast(then); -- let else_: $repr = bitcast(else_); -- bitcast((then & self.to_repr()) | (else_ & (!self).to_repr())) -- } -- -- /// Check if every element of `self` is true. -- /// -- /// This is equivalent to the following, but is possibly -- /// more efficient: -- /// -- /// ```rust,ignore -- /// self.extract(0) && self.extract(1) && ... -- /// ``` -- #[inline] -- pub fn all(self) -> bool { -- common::$all(self) -- } -- /// Check if any element of `self` is true. -- /// -- /// This is equivalent to the following, but is possibly -- /// more efficient: -- /// -- /// ```rust,ignore -- /// self.extract(0) || self.extract(1) || ... -- /// ``` -- #[inline] -- pub fn any(self) -> bool { -- common::$any(self) -- } -- -- $( -- #[$cvt_meta] -- #[inline] -- pub fn $cvt(self) -> $cvt_to { -- bitcast(self) -- } -- )* -- } -- impl ops::Not for $name { -- type Output = Self; -- -- #[inline] -- fn not(self) -> Self { -- Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr()) -- } -- } -- )* -- } --} -- --bool_impls! { -- bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3 -- [/// Convert `self` to a boolean vector for interacting with floating point vectors. -- to_f -> bool32fx4]; -- bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3 -- [/// Convert `self` to a boolean vector for interacting with integer vectors. -- to_i -> bool32ix4]; -- -- bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 []; -- -- bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; --} -- --impl u32x4 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i32(self) -> i32x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 32-bit float. -- #[inline] -- pub fn to_f32(self) -> f32x4 { -- unsafe {simd_cast(self)} -- } --} --impl i32x4 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u32(self) -> u32x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 32-bit float. -- #[inline] -- pub fn to_f32(self) -> f32x4 { -- unsafe {simd_cast(self)} -- } --} --impl f32x4 { -- /// Compute the square root of each lane. -- #[inline] -- pub fn sqrt(self) -> Self { -- common::f32x4_sqrt(self) -- } -- /// Compute an approximation to the reciprocal of the square root -- /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`. -- /// -- /// The accuracy of this approximation is platform dependent. -- #[inline] -- pub fn approx_rsqrt(self) -> Self { -- common::f32x4_approx_rsqrt(self) -- } -- /// Compute an approximation to the reciprocal of `self`, that is, -- /// `f32::splat(1.0) / self`. -- /// -- /// The accuracy of this approximation is platform dependent. -- #[inline] -- pub fn approx_reciprocal(self) -> Self { -- common::f32x4_approx_reciprocal(self) -- } -- /// Compute the lane-wise maximum of `self` and `other`. -- /// -- /// This is equivalent to the following, but is possibly more -- /// efficient: -- /// -- /// ```rust,ignore -- /// f32x4::new(self.extract(0).max(other.extract(0)), -- /// self.extract(1).max(other.extract(1)), -- /// ...) -- /// ``` -- #[inline] -- pub fn max(self, other: Self) -> Self { -- common::f32x4_max(self, other) -- } -- /// Compute the lane-wise minimum of `self` and `other`. -- /// -- /// This is equivalent to the following, but is possibly more -- /// efficient: -- /// -- /// ```rust,ignore -- /// f32x4::new(self.extract(0).min(other.extract(0)), -- /// self.extract(1).min(other.extract(1)), -- /// ...) -- /// ``` -- #[inline] -- pub fn min(self, other: Self) -> Self { -- common::f32x4_min(self, other) -- } -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i32(self) -> i32x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u32(self) -> u32x4 { -- unsafe {simd_cast(self)} -- } --} -- --impl i16x8 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u16(self) -> u16x8 { -- unsafe {simd_cast(self)} -- } --} --impl u16x8 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i16(self) -> i16x8 { -- unsafe {simd_cast(self)} -- } --} -- --impl i8x16 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u8(self) -> u8x16 { -- unsafe {simd_cast(self)} -- } --} --impl u8x16 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i8(self) -> i8x16 { -- unsafe {simd_cast(self)} -- } --} -- -- --macro_rules! neg_impls { -- ($zero: expr, $($ty: ident,)*) => { -- $(impl ops::Neg for $ty { -- type Output = Self; -- fn neg(self) -> Self { -- $ty::splat($zero) - self -- } -- })* -- } --} --neg_impls!{ -- 0, -- i32x4, -- i16x8, -- i8x16, --} --neg_impls! { -- 0.0, -- f32x4, --} --macro_rules! not_impls { -- ($($ty: ident,)*) => { -- $(impl ops::Not for $ty { -- type Output = Self; -- fn not(self) -> Self { -- $ty::splat(!0) ^ self -- } -- })* -- } --} --not_impls! { -- i32x4, -- i16x8, -- i8x16, -- u32x4, -- u16x8, -- u8x16, --} -- --macro_rules! operators { -- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { -- $( -- $(impl ops::$trayt for $ty { -- type Output = Self; -- #[inline] -- fn $method(self, x: Self) -> Self { -- unsafe {$func(self, x)} -- } -- })* -- )* -- } --} --operators! { -- Add (simd_add, add): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- f32x4; -- Sub (simd_sub, sub): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- f32x4; -- Mul (simd_mul, mul): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- f32x4; -- Div (simd_div, div): f32x4; -- -- BitAnd (simd_and, bitand): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- bool8ix16, bool16ix8, bool32ix4, -- bool32fx4; -- BitOr (simd_or, bitor): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- bool8ix16, bool16ix8, bool32ix4, -- bool32fx4; -- BitXor (simd_xor, bitxor): -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, -- bool8ix16, bool16ix8, bool32ix4, -- bool32fx4; --} -- --macro_rules! shift_one { -- ($ty: ident, $($by: ident),*) => { -- $( -- impl ops::Shl<$by> for $ty { -- type Output = Self; -- #[inline] -- fn shl(self, other: $by) -> Self { -- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } -- } -- } -- impl ops::Shr<$by> for $ty { -- type Output = Self; -- #[inline] -- fn shr(self, other: $by) -> Self { -- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} -- } -- } -- )* -- } --} -- --macro_rules! shift { -- ($($ty: ident),*) => { -- $(shift_one! { -- $ty, -- u8, u16, u32, u64, usize, -- i8, i16, i32, i64, isize -- })* -- } --} --shift! { -- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4 --} -diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs -deleted file mode 100644 -index e8fb1b16f53b..000000000000 ---- a/third_party/rust/simd/src/lib.rs -+++ /dev/null -@@ -1,804 +0,0 @@ --//! `simd` offers a basic interface to the SIMD functionality of CPUs. --#![no_std] -- --#![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)] --#![allow(non_camel_case_types)] -- --#[cfg(feature = "with-serde")] --extern crate serde; --#[cfg(feature = "with-serde")] --#[macro_use] --extern crate serde_derive; -- --use core::mem; -- --/// Boolean type for 8-bit integers. --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] --pub struct bool8i(i8); --/// Boolean type for 16-bit integers. --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] --pub struct bool16i(i16); --/// Boolean type for 32-bit integers. --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] --pub struct bool32i(i32); --/// Boolean type for 32-bit floats. --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] --pub struct bool32f(i32); -- --macro_rules! bool { -- ($($name: ident, $inner: ty;)*) => { -- $( -- impl From for $name { -- #[inline] -- fn from(b: bool) -> $name { -- $name(-(b as $inner)) -- } -- } -- impl From<$name> for bool { -- #[inline] -- fn from(b: $name) -> bool { -- b.0 != 0 -- } -- } -- )* -- } --} --bool! { -- bool8i, i8; -- bool16i, i16; -- bool32i, i32; -- bool32f, i32; --} -- --/// Types that are SIMD vectors. --pub unsafe trait Simd { -- /// The corresponding boolean vector type. -- type Bool: Simd; -- /// The element that this vector stores. -- type Elem; --} -- --/// A SIMD vector of 4 `u32`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u32x4(u32, u32, u32, u32); --/// A SIMD vector of 4 `i32`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i32x4(i32, i32, i32, i32); --/// A SIMD vector of 4 `f32`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct f32x4(f32, f32, f32, f32); --/// A SIMD boolean vector for length-4 vectors of 32-bit integers. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32ix4(i32, i32, i32, i32); --/// A SIMD boolean vector for length-4 vectors of 32-bit floats. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32fx4(i32, i32, i32, i32); -- --#[allow(dead_code)] --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --struct u32x2(u32, u32); --#[allow(dead_code)] --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --struct i32x2(i32, i32); --#[allow(dead_code)] --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --struct f32x2(f32, f32); --#[allow(dead_code)] --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --struct bool32ix2(i32, i32); --#[allow(dead_code)] --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --struct bool32fx2(i32, i32); -- --/// A SIMD vector of 8 `u16`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u16x8(u16, u16, u16, u16, -- u16, u16, u16, u16); --/// A SIMD vector of 8 `i16`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i16x8(i16, i16, i16, i16, -- i16, i16, i16, i16); --/// A SIMD boolean vector for length-8 vectors of 16-bit integers. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool16ix8(i16, i16, i16, i16, -- i16, i16, i16, i16); -- --/// A SIMD vector of 16 `u8`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, -- u8, u8, u8, u8, u8, u8, u8, u8); --/// A SIMD vector of 16 `i8`s. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8); --/// A SIMD boolean vector for length-16 vectors of 8-bit integers. --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool8ix16(i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8); -- -- --macro_rules! simd { -- ($($bool: ty: $($ty: ty = $elem: ty),*;)*) => { -- $($(unsafe impl Simd for $ty { -- type Bool = $bool; -- type Elem = $elem; -- } -- impl Clone for $ty { #[inline] fn clone(&self) -> Self { *self } } -- )*)*} --} --simd! { -- bool8ix16: i8x16 = i8, u8x16 = u8, bool8ix16 = bool8i; -- bool16ix8: i16x8 = i16, u16x8 = u16, bool16ix8 = bool16i; -- bool32ix4: i32x4 = i32, u32x4 = u32, bool32ix4 = bool32i; -- bool32fx4: f32x4 = f32, bool32fx4 = bool32f; -- -- bool32ix2: i32x2 = i32, u32x2 = u32, bool32ix2 = bool32i; -- bool32fx2: f32x2 = f32, bool32fx2 = bool32f; --} -- --#[allow(dead_code)] --#[inline] --fn bitcast(x: T) -> U { -- assert_eq!(mem::size_of::(), -- mem::size_of::()); -- unsafe {mem::transmute_copy(&x)} --} -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn simd_eq, U>(x: T, y: T) -> U; -- fn simd_ne, U>(x: T, y: T) -> U; -- fn simd_lt, U>(x: T, y: T) -> U; -- fn simd_le, U>(x: T, y: T) -> U; -- fn simd_gt, U>(x: T, y: T) -> U; -- fn simd_ge, U>(x: T, y: T) -> U; -- -- fn simd_shuffle2>(x: T, y: T, idx: [u32; 2]) -> U; -- fn simd_shuffle4>(x: T, y: T, idx: [u32; 4]) -> U; -- fn simd_shuffle8>(x: T, y: T, idx: [u32; 8]) -> U; -- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; -- -- fn simd_insert, U>(x: T, idx: u32, val: U) -> T; -- fn simd_extract, U>(x: T, idx: u32) -> U; -- -- fn simd_cast(x: T) -> U; -- -- fn simd_add(x: T, y: T) -> T; -- fn simd_sub(x: T, y: T) -> T; -- fn simd_mul(x: T, y: T) -> T; -- fn simd_div(x: T, y: T) -> T; -- fn simd_shl(x: T, y: T) -> T; -- fn simd_shr(x: T, y: T) -> T; -- fn simd_and(x: T, y: T) -> T; -- fn simd_or(x: T, y: T) -> T; -- fn simd_xor(x: T, y: T) -> T; --} --#[repr(packed)] --#[derive(Copy)] --struct Unalign(T); -- --impl Clone for Unalign { -- fn clone(&self) -> Unalign { -- Unalign(unsafe { self.0.clone() }) -- } --} -- --#[macro_use] --mod common; --mod sixty_four; --mod v256; -- --#[cfg(any(feature = "doc", -- target_arch = "x86", -- target_arch = "x86_64"))] --pub mod x86; --#[cfg(any(feature = "doc", target_arch = "arm"))] --pub mod arm; --#[cfg(any(feature = "doc", target_arch = "aarch64"))] --pub mod aarch64; -- --#[cfg(test)] --mod tests { -- -- use super::u8x16; -- use super::u16x8; -- use super::u32x4; -- use super::f32x4; -- -- #[test] -- fn test_u8x16_none_not_any() { -- let x1 = u8x16::splat(1); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_none_not_all() { -- let x1 = u8x16::splat(1); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_all_any() { -- let x1 = u8x16::splat(1); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_all_all() { -- let x1 = u8x16::splat(1); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_except_last_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u8x16::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_except_last_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_except_first_any() { -- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_except_first_not_all() { -- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_only_last_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_only_last_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u8x16::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_only_first_any() { -- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_only_first_not_all() { -- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_except_thirteenth_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_except_thirteenth_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_except_fifth_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_except_fifth_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_only_thirteenth_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_only_thirteenth_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u8x16_only_fifth_any() { -- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u8x16_only_fifth_not_all() { -- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u8x16::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_none_not_any() { -- let x1 = u16x8::splat(1); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_none_not_all() { -- let x1 = u16x8::splat(1); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_all_any() { -- let x1 = u16x8::splat(1); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_all_all() { -- let x1 = u16x8::splat(1); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_except_last_any() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u16x8::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_except_last_not_all() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_except_first_any() { -- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_except_first_not_all() { -- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_only_last_any() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_only_last_not_all() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); -- let x2 = u16x8::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_only_first_any() { -- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_only_first_not_all() { -- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_except_sixth_any() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); -- let x2 = u16x8::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_except_sixth_not_all() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_except_third_any() { -- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_except_third_not_all() { -- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_only_sixth_any() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_only_sixth_not_all() { -- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); -- let x2 = u16x8::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u16x8_only_third_any() { -- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u16x8_only_third_not_all() { -- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); -- let x2 = u16x8::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_none_not_any() { -- let x1 = u32x4::splat(1); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_none_not_all() { -- let x1 = u32x4::splat(1); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_all_any() { -- let x1 = u32x4::splat(1); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_all_all() { -- let x1 = u32x4::splat(1); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_except_last_any() { -- let x1 = u32x4::new(2, 2, 2, 1); -- let x2 = u32x4::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_except_last_not_all() { -- let x1 = u32x4::new(2, 2, 2, 1); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_except_first_any() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_except_first_not_all() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_only_last_any() { -- let x1 = u32x4::new(2, 2, 2, 1); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_only_last_not_all() { -- let x1 = u32x4::new(2, 2, 2, 1); -- let x2 = u32x4::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_only_first_any() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_only_first_not_all() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_except_second_any() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_except_second_not_all() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_except_third_any() { -- let x1 = u32x4::new(2, 2, 1, 2); -- let x2 = u32x4::splat(2); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_except_third_not_all() { -- let x1 = u32x4::new(2, 2, 1, 2); -- let x2 = u32x4::splat(2); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_only_second_any() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_only_second_not_all() { -- let x1 = u32x4::new(1, 2, 2, 2); -- let x2 = u32x4::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_u32x4_only_third_any() { -- let x1 = u32x4::new(2, 2, 1, 2); -- let x2 = u32x4::splat(1); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_u32x4_only_third_not_all() { -- let x1 = u32x4::new(2, 2, 1, 2); -- let x2 = u32x4::splat(1); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_none_not_any() { -- let x1 = f32x4::splat(1.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_none_not_all() { -- let x1 = f32x4::splat(1.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_all_any() { -- let x1 = f32x4::splat(1.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_all_all() { -- let x1 = f32x4::splat(1.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_except_last_any() { -- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); -- let x2 = f32x4::splat(2.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_except_last_not_all() { -- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_except_first_any() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_except_first_not_all() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_only_last_any() { -- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_only_last_not_all() { -- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); -- let x2 = f32x4::splat(1.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_only_first_any() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_only_first_not_all() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_except_second_any() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_except_second_not_all() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_except_third_any() { -- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_except_third_not_all() { -- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); -- let x2 = f32x4::splat(2.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_only_second_any() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_only_second_not_all() { -- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!(!(x1.eq(x2)).all()); -- } -- -- #[test] -- fn test_f32x4_only_third_any() { -- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!((x1.eq(x2)).any()); -- } -- -- #[test] -- fn test_f32x4_only_third_not_all() { -- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); -- let x2 = f32x4::splat(1.0); -- assert!(!(x1.eq(x2)).all()); -- } -- --} -diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs -deleted file mode 100644 -index a87f44a77ee7..000000000000 ---- a/third_party/rust/simd/src/sixty_four.rs -+++ /dev/null -@@ -1,228 +0,0 @@ --#![allow(dead_code)] --use super::*; --#[allow(unused_imports)] --use super::{ -- f32x2, -- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, -- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, -- simd_insert, simd_extract, -- simd_cast, -- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, -- -- Unalign, bitcast, --}; --use core::{mem,ops}; -- --/// Boolean type for 64-bit integers. --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone)] --pub struct bool64i(i64); --/// Boolean type for 64-bit floats. --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy, Clone)] --pub struct bool64f(i64); --/// A SIMD vector of 2 `u64`s. --#[repr(simd)] --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u64x2(u64, u64); --/// A SIMD vector of 2 `i64`s. --#[repr(simd)] --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i64x2(i64, i64); --/// A SIMD vector of 2 `f64`s. --#[repr(simd)] --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct f64x2(f64, f64); --/// A SIMD boolean vector for length-2 vectors of 64-bit integers. --#[repr(simd)] --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool64ix2(i64, i64); --/// A SIMD boolean vector for length-2 vectors of 64-bit floats. --#[repr(simd)] --#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool64fx2(i64, i64); -- --simd! { -- bool64ix2: i64x2 = i64, u64x2 = u64, bool64ix2 = bool64i; -- bool64fx2: f64x2 = f64, bool64fx2 = bool64f; --} --basic_impls! { -- u64x2: u64, bool64ix2, simd_shuffle2, 2, x0 | x1; -- i64x2: i64, bool64ix2, simd_shuffle2, 2, x0 | x1; -- f64x2: f64, bool64fx2, simd_shuffle2, 2, x0 | x1; --} -- --mod common { -- use super::*; -- // naive for now -- #[inline] -- pub fn bool64ix2_all(x: bool64ix2) -> bool { -- x.0 != 0 && x.1 != 0 -- } -- #[inline] -- pub fn bool64ix2_any(x: bool64ix2) -> bool { -- x.0 != 0 || x.1 != 0 -- } -- #[inline] -- pub fn bool64fx2_all(x: bool64fx2) -> bool { -- x.0 != 0 && x.1 != 0 -- } -- #[inline] -- pub fn bool64fx2_any(x: bool64fx2) -> bool { -- x.0 != 0 || x.1 != 0 -- }} --bool_impls! { -- bool64ix2: bool64i, i64x2, i64, 2, bool64ix2_all, bool64ix2_any, x0 | x1 -- [/// Convert `self` to a boolean vector for interacting with floating point vectors. -- to_f -> bool64fx2]; -- -- bool64fx2: bool64f, i64x2, i64, 2, bool64fx2_all, bool64fx2_any, x0 | x1 -- [/// Convert `self` to a boolean vector for interacting with integer vectors. -- to_i -> bool64ix2]; --} -- --impl u64x2 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i64(self) -> i64x2 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 64-bit float. -- #[inline] -- pub fn to_f64(self) -> f64x2 { -- unsafe {simd_cast(self)} -- } --} --impl i64x2 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u64(self) -> u64x2 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 64-bit float. -- #[inline] -- pub fn to_f64(self) -> f64x2 { -- unsafe {simd_cast(self)} -- } --} --impl f64x2 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i64(self) -> i64x2 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u64(self) -> u64x2 { -- unsafe {simd_cast(self)} -- } -- -- /// Convert each lane to a 32-bit float. -- #[inline] -- pub fn to_f32(self) -> f32x4 { -- unsafe { -- let x: f32x2 = simd_cast(self); -- f32x4::new(x.0, x.1, 0.0, 0.0) -- } -- } --} -- --neg_impls!{ -- 0, -- i64x2, --} --neg_impls! { -- 0.0, -- f64x2, --} --macro_rules! not_impls { -- ($($ty: ident,)*) => { -- $(impl ops::Not for $ty { -- type Output = Self; -- fn not(self) -> Self { -- $ty::splat(!0) ^ self -- } -- })* -- } --} --not_impls! { -- i64x2, -- u64x2, --} -- --macro_rules! operators { -- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { -- $( -- $(impl ops::$trayt for $ty { -- type Output = Self; -- #[inline] -- fn $method(self, x: Self) -> Self { -- unsafe {$func(self, x)} -- } -- })* -- )* -- } --} --operators! { -- Add (simd_add, add): -- i64x2, u64x2, -- f64x2; -- Sub (simd_sub, sub): -- i64x2, u64x2, -- f64x2; -- Mul (simd_mul, mul): -- i64x2, u64x2, -- f64x2; -- Div (simd_div, div): f64x2; -- -- BitAnd (simd_and, bitand): -- i64x2, u64x2, -- bool64ix2, -- bool64fx2; -- BitOr (simd_or, bitor): -- i64x2, u64x2, -- bool64ix2, -- bool64fx2; -- BitXor (simd_xor, bitxor): -- i64x2, u64x2, -- bool64ix2, -- bool64fx2; --} -- --macro_rules! shift_one { ($ty: ident, $($by: ident),*) => { -- $( -- impl ops::Shl<$by> for $ty { -- type Output = Self; -- #[inline] -- fn shl(self, other: $by) -> Self { -- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } -- } -- } -- impl ops::Shr<$by> for $ty { -- type Output = Self; -- #[inline] -- fn shr(self, other: $by) -> Self { -- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} -- } -- } -- )* -- } --} -- --macro_rules! shift { -- ($($ty: ident),*) => { -- $(shift_one! { -- $ty, -- u8, u16, u32, u64, usize, -- i8, i16, i32, i64, isize -- })* -- } --} --shift! { -- i64x2, u64x2 --} -diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs -deleted file mode 100644 -index 519eb14e7259..000000000000 ---- a/third_party/rust/simd/src/v256.rs -+++ /dev/null -@@ -1,436 +0,0 @@ --#![allow(dead_code)] --use core::{mem,ops}; --#[allow(unused_imports)] --use super::{ -- Simd, -- u32x4, i32x4, u16x8, i16x8, u8x16, i8x16, f32x4, -- bool32ix4, bool16ix8, bool8ix16, bool32fx4, -- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, -- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, -- simd_insert, simd_extract, -- simd_cast, -- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, -- bool8i, bool16i, bool32i, bool32f, -- Unalign, bitcast, --}; --use super::sixty_four::*; --#[cfg(all(target_feature = "avx"))] --use super::x86::avx::common; -- --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u64x4(u64, u64, u64, u64); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i64x4(i64, i64, i64, i64); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct f64x4(f64, f64, f64, f64); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool64ix4(i64, i64, i64, i64); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool64fx4(i64, i64, i64, i64); -- --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u32x8(u32, u32, u32, u32, -- u32, u32, u32, u32); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i32x8(i32, i32, i32, i32, -- i32, i32, i32, i32); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct f32x8(f32, f32, f32, f32, -- f32, f32, f32, f32); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32ix8(i32, i32, i32, i32, -- i32, i32, i32, i32);#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool32fx8(i32, i32, i32, i32, -- i32, i32, i32, i32); -- --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16, -- u16, u16, u16, u16, u16, u16, u16, u16); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16, -- i16, i16, i16, i16, i16, i16, i16, i16); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool16ix16(i16, i16, i16, i16, i16, i16, i16, i16, -- i16, i16, i16, i16, i16, i16, i16, i16); -- --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8, -- u8, u8, u8, u8, u8, u8, u8, u8, -- u8, u8, u8, u8, u8, u8, u8, u8, -- u8, u8, u8, u8, u8, u8, u8, u8); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8); --#[repr(simd)] --#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] --#[derive(Debug, Copy)] --pub struct bool8ix32(i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8, -- i8, i8, i8, i8, i8, i8, i8, i8); -- --simd! { -- bool8ix32: i8x32 = i8, u8x32 = u8, bool8ix32 = bool8i; -- bool16ix16: i16x16 = i16, u16x16 = u16, bool16ix16 = bool16i; -- bool32ix8: i32x8 = i32, u32x8 = u32, bool32ix8 = bool32i; -- bool64ix4: i64x4 = i64, u64x4 = u64, bool64ix4 = bool64i; -- -- bool32fx8: f32x8 = f32, bool32fx8 = bool32f; -- bool64fx4: f64x4 = f64, bool64fx4 = bool64f; --} -- --basic_impls! { -- u64x4: u64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; -- i64x4: i64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; -- f64x4: f64, bool64fx4, simd_shuffle4, 4, x0, x1 | x2, x3; -- -- u32x8: u32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; -- i32x8: i32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; -- f32x8: f32, bool32fx8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; -- -- u16x16: u16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; -- i16x16: i16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; -- -- u8x32: u8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; -- i8x32: i8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; --} -- --#[cfg(all(not(target_feature = "avx")))] --#[doc(hidden)] --mod common { -- use super::*; -- // implementation via SSE vectors -- macro_rules! bools { -- ($($ty: ty, $all: ident, $any: ident;)*) => { -- $( -- #[inline] -- pub fn $all(x: $ty) -> bool { -- x.low().all() && x.high().all() -- } -- #[inline] -- pub fn $any(x: $ty) -> bool { -- x.low().any() || x.high().any() -- } -- )* -- } -- } -- -- bools! { -- bool64ix4, bool64ix4_all, bool64ix4_any; -- bool64fx4, bool64fx4_all, bool64fx4_any; -- bool32ix8, bool32ix8_all, bool32ix8_any; -- bool32fx8, bool32fx8_all, bool32fx8_any; -- bool16ix16, bool16ix16_all, bool16ix16_any; -- bool8ix32, bool8ix32_all, bool8ix32_any; -- } -- --} -- --bool_impls! { -- bool64ix4: bool64i, i64x4, i64, 4, bool64ix4_all, bool64ix4_any, x0, x1 | x2, x3 -- [/// Convert `self` to a boolean vector for interacting with floating point vectors. -- to_f -> bool64fx4]; -- -- bool64fx4: bool64f, i64x4, i64, 4, bool64fx4_all, bool64fx4_any, x0, x1 | x2, x3 -- [/// Convert `self` to a boolean vector for interacting with integer vectors. -- to_i -> bool64ix4]; -- -- bool32ix8: bool32i, i32x8, i32, 8, bool32ix8_all, bool32ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 -- [/// Convert `self` to a boolean vector for interacting with floating point vectors. -- to_f -> bool32fx8]; -- -- bool32fx8: bool32f, i32x8, i32, 8, bool32fx8_all, bool32fx8_any, x0, x1, x2, x3 | x4, x5, x6, x7 -- [/// Convert `self` to a boolean vector for interacting with integer vectors. -- to_i -> bool32ix8]; -- -- bool16ix16: bool16i, i16x16, i16, 16, bool16ix16_all, bool16ix16_any, -- x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; -- -- bool8ix32: bool8i, i8x32, i8, 32, bool8ix32_all, bool8ix32_any, -- x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | -- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 []; --} -- --pub trait LowHigh128 { -- type Half: Simd; -- /// Extract the low 128 bit part. -- fn low(self) -> Self::Half; -- /// Extract the high 128 bit part. -- fn high(self) -> Self::Half; --} -- --macro_rules! expr { ($x:expr) => ($x) } // HACK --macro_rules! low_high_impls { -- ($( -- $name: ident, $half: ident, $($first: tt),+ ... $($last: tt),+; -- )*) => { -- $(impl LowHigh128 for $name { -- type Half = $half; -- #[inline] -- fn low(self) -> Self::Half { -- $half::new($( expr!(self.$first), )*) -- } -- -- #[inline] -- fn high(self) -> Self::Half { -- $half::new($( expr!(self.$last), )*) -- } -- })* -- } --} -- --low_high_impls! { -- u64x4, u64x2, 0, 1 ... 2, 3; -- i64x4, i64x2, 0, 1 ... 2, 3; -- f64x4, f64x2, 0, 1 ... 2, 3; -- -- u32x8, u32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; -- i32x8, i32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; -- f32x8, f32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; -- -- u16x16, u16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; -- i16x16, i16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; -- -- u8x32, u8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... -- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; -- i8x32, i8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... -- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; -- --} -- --macro_rules! bool_low_high_impls { -- ($( -- $name: ident: $half: ident; -- )*) => { -- $(impl LowHigh128 for $name { -- type Half = $half; -- /// Extract the low 128 bit part. -- #[inline] -- fn low(self) -> Self::Half { -- Self::Half::from_repr(self.to_repr().low()) -- } -- -- /// Extract the high 128 bit part. -- #[inline] -- fn high(self) -> Self::Half { -- Self::Half::from_repr(self.to_repr().high()) -- } -- })* -- } --} -- --bool_low_high_impls! { -- bool64fx4: bool64fx2; -- bool32fx8: bool32fx4; -- -- bool64ix4: bool64ix2; -- bool32ix8: bool32ix4; -- bool16ix16: bool16ix8; -- bool8ix32: bool8ix16; --} -- --impl u64x4 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i64(self) -> i64x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 64-bit float. -- #[inline] -- pub fn to_f64(self) -> f64x4 { -- unsafe {simd_cast(self)} -- } --} -- --impl i64x4 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u64(self) -> u64x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 64-bit float. -- #[inline] -- pub fn to_f64(self) -> f64x4 { -- unsafe {simd_cast(self)} -- } --} -- --impl f64x4 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i64(self) -> i64x4 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u64(self) -> u64x4 { -- unsafe {simd_cast(self)} -- } --} -- --impl u32x8 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i32(self) -> i32x8 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 32-bit float. -- #[inline] -- pub fn to_f32(self) -> f32x8 { -- unsafe {simd_cast(self)} -- } --} -- --impl i32x8 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u32(self) -> u32x8 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to a 32-bit float. -- #[inline] -- pub fn to_f32(self) -> f32x8 { -- unsafe {simd_cast(self)} -- } --} -- --impl f32x8 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i32(self) -> i32x8 { -- unsafe {simd_cast(self)} -- } -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u32(self) -> u32x8 { -- unsafe {simd_cast(self)} -- } --} -- --impl i16x16 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u16(self) -> u16x16 { -- unsafe {simd_cast(self)} -- } --} -- --impl u16x16 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i16(self) -> i16x16 { -- unsafe {simd_cast(self)} -- } --} -- --impl i8x32 { -- /// Convert each lane to an unsigned integer. -- #[inline] -- pub fn to_u8(self) -> u8x32 { -- unsafe {simd_cast(self)} -- } --} -- --impl u8x32 { -- /// Convert each lane to a signed integer. -- #[inline] -- pub fn to_i8(self) -> i8x32 { -- unsafe {simd_cast(self)} -- } --} -- --operators! { -- Add (simd_add, add): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- f64x4, f32x8; -- Sub (simd_sub, sub): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- f64x4, f32x8; -- Mul (simd_mul, mul): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- f64x4, f32x8; -- Div (simd_div, div): f64x4, f32x8; -- -- BitAnd (simd_and, bitand): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- bool64ix4, bool32ix8, bool16ix16, -- bool64fx4, bool32fx8; -- BitOr (simd_or, bitor): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- bool64ix4, bool32ix8, bool16ix16, -- bool64fx4, bool32fx8; -- BitXor (simd_xor, bitxor): -- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, -- bool64ix4, bool32ix8, bool16ix16, -- bool64fx4, bool32fx8; --} -- --neg_impls!{ -- 0, -- i64x4, -- i32x8, -- i16x16, -- i8x32, --} -- --neg_impls! { -- 0.0, -- f64x4, -- f32x8, --} -- --not_impls! { -- i64x4, -- u64x4, -- i32x8, -- u32x8, -- i16x16, -- u16x16, -- i8x32, -- u8x32, --} -- --shift! { -- i64x4, -- u64x4, -- i32x8, -- u32x8, -- i16x16, -- u16x16, -- i8x32, -- u8x32 --} -diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs -deleted file mode 100644 -index 180247e36561..000000000000 ---- a/third_party/rust/simd/src/x86/avx.rs -+++ /dev/null -@@ -1,290 +0,0 @@ --use super::super::*; --use sixty_four::*; -- --use super::super::bitcast; -- --pub use v256::{ -- f64x4, bool64fx4, u64x4, i64x4, bool64ix4, -- f32x8, bool32fx8, u32x8, i32x8, bool32ix8, -- u16x16, i16x16, bool16ix16, -- u8x32, i8x32, bool8ix32, -- LowHigh128 --}; -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn x86_mm256_addsub_ps(x: f32x8, y: f32x8) -> f32x8; -- fn x86_mm256_addsub_pd(x: f64x4, y: f64x4) -> f64x4; -- fn x86_mm256_dp_ps(x: f32x8, y: f32x8, z: i32) -> f32x8; -- fn x86_mm256_hadd_ps(x: f32x8, y: f32x8) -> f32x8; -- fn x86_mm256_hadd_pd(x: f64x4, y: f64x4) -> f64x4; -- fn x86_mm256_hsub_ps(x: f32x8, y: f32x8) -> f32x8; -- fn x86_mm256_hsub_pd(x: f64x4, y: f64x4) -> f64x4; -- fn x86_mm256_max_ps(x: f32x8, y: f32x8) -> f32x8; -- fn x86_mm256_max_pd(x: f64x4, y: f64x4) -> f64x4; -- fn x86_mm256_min_ps(x: f32x8, y: f32x8) -> f32x8; -- fn x86_mm256_min_pd(x: f64x4, y: f64x4) -> f64x4; -- fn x86_mm256_movemask_ps(x: f32x8) -> i32; -- fn x86_mm256_movemask_pd(x: f64x4) -> i32; -- fn x86_mm_permutevar_ps(x: f32x4, y: i32x4) -> f32x4; -- fn x86_mm_permutevar_pd(x: f64x2, y: i64x2) -> f64x2; -- fn x86_mm256_permutevar_ps(x: f32x8, y: i32x8) -> f32x8; -- fn x86_mm256_permutevar_pd(x: f64x4, y: i64x4) -> f64x4; -- fn x86_mm256_rcp_ps(x: f32x8) -> f32x8; -- fn x86_mm256_rsqrt_ps(x: f32x8) -> f32x8; -- fn x86_mm256_sqrt_ps(x: f32x8) -> f32x8; -- fn x86_mm256_sqrt_pd(x: f64x4) -> f64x4; -- fn x86_mm_testc_ps(x: f32x4, y: f32x4) -> i32; -- fn x86_mm256_testc_ps(x: f32x8, y: f32x8) -> i32; -- fn x86_mm_testc_pd(x: f64x2, y: f64x2) -> i32; -- fn x86_mm256_testc_pd(x: f64x4, y: f64x4) -> i32; -- fn x86_mm256_testc_si256(x: u64x4, y: u64x4) -> i32; -- fn x86_mm_testnzc_ps(x: f32x4, y: f32x4) -> i32; -- fn x86_mm256_testnzc_ps(x: f32x8, y: f32x8) -> i32; -- fn x86_mm_testnzc_pd(x: f64x2, y: f64x2) -> i32; -- fn x86_mm256_testnzc_pd(x: f64x4, y: f64x4) -> i32; -- fn x86_mm256_testnzc_si256(x: u64x4, y: u64x4) -> i32; -- fn x86_mm_testz_ps(x: f32x4, y: f32x4) -> i32; -- fn x86_mm256_testz_ps(x: f32x8, y: f32x8) -> i32; -- fn x86_mm_testz_pd(x: f64x2, y: f64x2) -> i32; -- fn x86_mm256_testz_pd(x: f64x4, y: f64x4) -> i32; -- fn x86_mm256_testz_si256(x: u64x4, y: u64x4) -> i32; --} -- --#[doc(hidden)] --pub mod common { -- use super::*; -- use core::mem; -- -- macro_rules! bools { -- ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => { -- $( -- #[inline] -- pub fn $all(x: $ty) -> bool { -- unsafe { -- super::$testc(mem::transmute(x), mem::transmute(<$ty>::splat(true))) != 0 -- } -- } -- #[inline] -- pub fn $any(x: $ty) -> bool { -- unsafe { -- super::$testz(mem::transmute(x), mem::transmute(x)) == 0 -- } -- } -- )* -- } -- } -- -- bools! { -- bool32fx8, bool32fx8_all, bool32fx8_any, x86_mm256_testc_ps, x86_mm256_testz_ps; -- bool64fx4, bool64fx4_all, bool64fx4_any, x86_mm256_testc_pd, x86_mm256_testz_pd; -- bool8ix32, bool8ix32_all, bool8ix32_any, x86_mm256_testc_si256, x86_mm256_testz_si256; -- bool16ix16, bool16ix16_all, bool16ix16_any, x86_mm256_testc_si256, x86_mm256_testz_si256; -- bool32ix8, bool32ix8_all, bool32ix8_any, x86_mm256_testc_si256, x86_mm256_testz_si256; -- bool64ix4, bool64ix4_all, bool64ix4_any, x86_mm256_testc_si256, x86_mm256_testz_si256; -- } --} -- --// 128-bit vectors: -- --// 32 bit floats -- --pub trait AvxF32x4 { -- fn permutevar(self, other: i32x4) -> f32x4; --} --impl AvxF32x4 for f32x4 { -- fn permutevar(self, other: i32x4) -> f32x4 { -- unsafe { x86_mm_permutevar_ps(self, other) } -- } --} -- --pub trait AvxF64x4 { -- fn sqrt(self) -> Self; -- fn addsub(self, other: Self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn move_mask(self) -> u32; --} -- --impl AvxF64x4 for f64x4 { -- #[inline] -- fn sqrt(self) -> Self { -- unsafe { x86_mm256_sqrt_pd(self) } -- } -- -- #[inline] -- fn addsub(self, other: Self) -> Self { -- unsafe { x86_mm256_addsub_pd(self, other) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm256_hadd_pd(self, other) } -- } -- -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm256_hsub_pd(self, other) } -- } -- -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm256_max_pd(self, other) } -- } -- -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm256_min_pd(self, other) } -- } -- -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm256_movemask_pd(self) as u32 } -- } --} -- --pub trait AvxBool64fx4 { -- fn move_mask(self) -> u32; --} --impl AvxBool64fx4 for bool64fx4 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm256_movemask_pd(bitcast(self)) as u32 } -- } --} -- --pub trait AvxF32x8 { -- fn sqrt(self) -> Self; -- fn addsub(self, other: Self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn move_mask(self) -> u32; -- /// Compute an approximation to the reciprocal of the square root -- /// of `self`, that is, `f32x8::splat(1.0) / self.sqrt()`. -- /// -- /// The accuracy of this approximation is platform dependent. -- fn approx_rsqrt(self) -> Self; -- /// Compute an approximation to the reciprocal of `self`, that is, -- /// `f32x8::splat(1.0) / self`. -- /// -- /// The accuracy of this approximation is platform dependent. -- fn approx_reciprocal(self) -> Self; --} -- --impl AvxF32x8 for f32x8 { -- #[inline] -- fn sqrt(self) -> Self { -- unsafe { x86_mm256_sqrt_ps(self) } -- } -- -- #[inline] -- fn addsub(self, other: Self) -> Self { -- unsafe { x86_mm256_addsub_ps(self, other) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm256_hadd_ps(self, other) } -- } -- -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm256_hsub_ps(self, other) } -- } -- -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm256_max_ps(self, other) } -- } -- -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm256_min_ps(self, other) } -- } -- -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm256_movemask_ps(self) as u32 } -- } -- -- #[inline] -- fn approx_reciprocal(self) -> Self { -- unsafe { x86_mm256_rcp_ps(self) } -- } -- -- #[inline] -- fn approx_rsqrt(self) -> Self { -- unsafe { x86_mm256_rsqrt_ps(self) } -- } --} -- --pub trait AvxBool32fx8 { -- fn move_mask(self) -> u32; --} --impl AvxBool32fx8 for bool32fx8 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm256_movemask_ps(bitcast(self)) as u32 } -- } --} -- --pub trait AvxBool32fx4 {} --impl AvxBool32fx4 for bool32fx4 {} -- --// 64 bit floats -- --pub trait AvxF64x2 { -- fn permutevar(self, other: i64x2) -> f64x2; --} --impl AvxF64x2 for f64x2 { -- fn permutevar(self, other: i64x2) -> f64x2 { -- unsafe { x86_mm_permutevar_pd(self, other) } -- } --} -- --pub trait AvxBool64fx2 {} --impl AvxBool64fx2 for bool64fx2 {} -- --// 64 bit integers -- --pub trait AvxU64x2 {} --impl AvxU64x2 for u64x2 {} --pub trait AvxI64x2 {} --impl AvxI64x2 for i64x2 {} -- --pub trait AvxBool64ix2 {} --impl AvxBool64ix2 for bool64ix2 {} -- --// 32 bit integers -- --pub trait AvxU32x4 {} --impl AvxU32x4 for u32x4 {} --pub trait AvxI32x4 {} --impl AvxI32x4 for i32x4 {} -- --pub trait AvxBool32ix4 {} --impl AvxBool32ix4 for bool32ix4 {} -- --// 16 bit integers -- --pub trait AvxU16x8 {} --impl AvxU16x8 for u16x8 {} --pub trait AvxI16x8 {} --impl AvxI16x8 for i16x8 {} -- --pub trait AvxBool16ix8 {} --impl AvxBool16ix8 for bool16ix8 {} -- --// 8 bit integers -- --pub trait AvxU8x16 {} --impl AvxU8x16 for u8x16 {} --pub trait AvxI8x16 {} --impl AvxI8x16 for i8x16 {} -- --pub trait AvxBool8ix16 {} --impl AvxBool8ix16 for bool8ix16 {} -diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs -deleted file mode 100644 -index e86a33d3b5bb..000000000000 ---- a/third_party/rust/simd/src/x86/avx2.rs -+++ /dev/null -@@ -1,65 +0,0 @@ --use x86::avx::*; -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn x86_mm256_abs_epi8(x: i8x32) -> i8x32; -- fn x86_mm256_abs_epi16(x: i16x16) -> i16x16; -- fn x86_mm256_abs_epi32(x: i32x8) -> i32x8; -- fn x86_mm256_adds_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_adds_epu8(x: u8x32, y: u8x32) -> u8x32; -- fn x86_mm256_adds_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_adds_epu16(x: u16x16, y: u16x16) -> u16x16; -- fn x86_mm256_avg_epu8(x: u8x32, y: u8x32) -> u8x32; -- fn x86_mm256_avg_epu16(x: u16x16, y: u16x16) -> u16x16; -- fn x86_mm256_hadd_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_hadd_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_hadds_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_hsub_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_hsub_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_hsubs_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_madd_epi16(x: i16x16, y: i16x16) -> i32x8; -- fn x86_mm256_maddubs_epi16(x: i8x32, y: i8x32) -> i16x16; -- fn x86_mm256_max_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_max_epu8(x: u8x32, y: u8x32) -> u8x32; -- fn x86_mm256_max_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_max_epu16(x: u16x16, y: u16x16) -> u16x16; -- fn x86_mm256_max_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_max_epu32(x: u32x8, y: u32x8) -> u32x8; -- fn x86_mm256_min_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_min_epu8(x: u8x32, y: u8x32) -> u8x32; -- fn x86_mm256_min_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_min_epu16(x: u16x16, y: u16x16) -> u16x16; -- fn x86_mm256_min_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_min_epu32(x: u32x8, y: u32x8) -> u32x8; -- fn x86_mm256_mul_epi64(x: i32x8, y: i32x8) -> i64x4; -- fn x86_mm256_mul_epu64(x: u32x8, y: u32x8) -> u64x4; -- fn x86_mm256_mulhi_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_mulhi_epu16(x: u16x16, y: u16x16) -> u16x16; -- fn x86_mm256_mulhrs_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_packs_epi16(x: i16x16, y: i16x16) -> i8x32; -- fn x86_mm256_packus_epi16(x: i16x16, y: i16x16) -> u8x32; -- fn x86_mm256_packs_epi32(x: i32x8, y: i32x8) -> i16x16; -- fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16; -- fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8; -- fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4; -- fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_sign_epi32(x: i32x8, y: i32x8) -> i32x8; -- fn x86_mm256_subs_epi8(x: i8x32, y: i8x32) -> i8x32; -- fn x86_mm256_subs_epu8(x: u8x32, y: u8x32) -> u8x32; -- fn x86_mm256_subs_epi16(x: i16x16, y: i16x16) -> i16x16; -- fn x86_mm256_subs_epu16(x: u16x16, y: u16x16) -> u16x16; --} -- --// broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) --// pub trait Avx2F32x8 { --// fn permutevar(self, other: i32x8) -> f32x8; --// } --// --// impl Avx2F32x8 for f32x8 { --// fn permutevar(self, other: i32x8) -> f32x8 { --// unsafe { x86_mm256_permutevar8x32_ps(self, other) } --// } --// } -diff --git a/third_party/rust/simd/src/x86/mod.rs b/third_party/rust/simd/src/x86/mod.rs -deleted file mode 100644 -index 8763fb16ccfd..000000000000 ---- a/third_party/rust/simd/src/x86/mod.rs -+++ /dev/null -@@ -1,16 +0,0 @@ --//! Features specific to x86 and x86-64 CPUs. -- --#[cfg(any(feature = "doc", target_feature = "sse2"))] --pub mod sse2; --#[cfg(any(feature = "doc", target_feature = "sse3"))] --pub mod sse3; --#[cfg(any(feature = "doc", target_feature = "ssse3"))] --pub mod ssse3; --#[cfg(any(feature = "doc", target_feature = "sse4.1"))] --pub mod sse4_1; --#[cfg(any(feature = "doc", target_feature = "sse4.2"))] --pub mod sse4_2; --#[cfg(any(feature = "doc", target_feature = "avx"))] --pub mod avx; --#[cfg(any(feature = "doc", target_feature = "avx2"))] --pub mod avx2; -diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs -deleted file mode 100644 -index 5cbc853694d5..000000000000 ---- a/third_party/rust/simd/src/x86/sse2.rs -+++ /dev/null -@@ -1,359 +0,0 @@ --use super::super::*; --use {bitcast, simd_cast, f32x2}; -- --pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; -- --//pub use super::{u64x2, i64x2, f64x2, bool64ix2, bool64fx2}; -- --// strictly speaking, these are SSE instructions, not SSE2. --extern "platform-intrinsic" { -- fn x86_mm_movemask_ps(x: f32x4) -> i32; -- fn x86_mm_max_ps(x: f32x4, y: f32x4) -> f32x4; -- fn x86_mm_min_ps(x: f32x4, y: f32x4) -> f32x4; -- fn x86_mm_rsqrt_ps(x: f32x4) -> f32x4; -- fn x86_mm_rcp_ps(x: f32x4) -> f32x4; -- fn x86_mm_sqrt_ps(x: f32x4) -> f32x4; --} -- --extern "platform-intrinsic" { -- fn x86_mm_adds_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_adds_epu8(x: u8x16, y: u8x16) -> u8x16; -- fn x86_mm_adds_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_adds_epu16(x: u16x8, y: u16x8) -> u16x8; -- fn x86_mm_avg_epu8(x: u8x16, y: u8x16) -> u8x16; -- fn x86_mm_avg_epu16(x: u16x8, y: u16x8) -> u16x8; -- fn x86_mm_madd_epi16(x: i16x8, y: i16x8) -> i32x4; -- fn x86_mm_max_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_max_epu8(x: u8x16, y: u8x16) -> u8x16; -- fn x86_mm_max_pd(x: f64x2, y: f64x2) -> f64x2; -- fn x86_mm_min_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_min_epu8(x: u8x16, y: u8x16) -> u8x16; -- fn x86_mm_min_pd(x: f64x2, y: f64x2) -> f64x2; -- fn x86_mm_movemask_pd(x: f64x2) -> i32; -- fn x86_mm_movemask_epi8(x: i8x16) -> i32; -- fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2; -- fn x86_mm_mulhi_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_mulhi_epu16(x: u16x8, y: u16x8) -> u16x8; -- fn x86_mm_packs_epi16(x: i16x8, y: i16x8) -> i8x16; -- fn x86_mm_packs_epi32(x: i32x4, y: i32x4) -> i16x8; -- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; -- fn x86_mm_sad_epu8(x: u8x16, y: u8x16) -> u64x2; -- fn x86_mm_sqrt_pd(x: f64x2) -> f64x2; -- fn x86_mm_subs_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_subs_epu8(x: u8x16, y: u8x16) -> u8x16; -- fn x86_mm_subs_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_subs_epu16(x: u16x8, y: u16x8) -> u16x8; --} -- --#[doc(hidden)] --pub mod common { -- use super::super::super::*; -- use core::mem; -- -- #[inline] -- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { -- unsafe {super::x86_mm_sqrt_ps(x)} -- } -- #[inline] -- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { -- unsafe {super::x86_mm_rsqrt_ps(x)} -- } -- #[inline] -- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { -- unsafe {super::x86_mm_rcp_ps(x)} -- } -- #[inline] -- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::x86_mm_max_ps(x, y)} -- } -- #[inline] -- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { -- unsafe {super::x86_mm_min_ps(x, y)} -- } -- -- macro_rules! bools { -- ($($ty: ty, $all: ident, $any: ident, $movemask: ident, $width: expr;)*) => { -- $( -- #[inline] -- pub fn $all(x: $ty) -> bool { -- unsafe { -- super::$movemask(mem::transmute(x)) == (1 << $width) - 1 -- } -- } -- #[inline] -- pub fn $any(x: $ty) -> bool { -- unsafe { -- super::$movemask(mem::transmute(x)) != 0 -- } -- } -- )* -- } -- } -- -- bools! { -- bool32fx4, bool32fx4_all, bool32fx4_any, x86_mm_movemask_ps, 4; -- bool8ix16, bool8ix16_all, bool8ix16_any, x86_mm_movemask_epi8, 16; -- bool16ix8, bool16ix8_all, bool16ix8_any, x86_mm_movemask_epi8, 16; -- bool32ix4, bool32ix4_all, bool32ix4_any, x86_mm_movemask_epi8, 16; -- } --} -- --// 32 bit floats -- --pub trait Sse2F32x4 { -- fn to_f64(self) -> f64x2; -- fn move_mask(self) -> u32; --} --impl Sse2F32x4 for f32x4 { -- #[inline] -- fn to_f64(self) -> f64x2 { -- unsafe { -- simd_cast(f32x2(self.0, self.1)) -- } -- } -- fn move_mask(self) -> u32 { -- unsafe {x86_mm_movemask_ps(self) as u32} -- } --} --pub trait Sse2Bool32fx4 { -- fn move_mask(self) -> u32; --} --impl Sse2Bool32fx4 for bool32fx4 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_ps(bitcast(self)) as u32} -- } --} -- --// 64 bit floats -- --pub trait Sse2F64x2 { -- fn move_mask(self) -> u32; -- fn sqrt(self) -> Self; -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; --} --impl Sse2F64x2 for f64x2 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} -- } -- -- #[inline] -- fn sqrt(self) -> Self { -- unsafe { x86_mm_sqrt_pd(self) } -- } -- -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_pd(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_pd(self, other) } -- } --} -- --pub trait Sse2Bool64fx2 { -- fn move_mask(self) -> u32; --} --impl Sse2Bool64fx2 for bool64fx2 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} -- } --} -- --// 64 bit ints -- --pub trait Sse2U64x2 {} --impl Sse2U64x2 for u64x2 {} -- --pub trait Sse2I64x2 {} --impl Sse2I64x2 for i64x2 {} -- --pub trait Sse2Bool64ix2 {} --impl Sse2Bool64ix2 for bool64ix2 {} -- --// 32 bit ints -- --pub trait Sse2U32x4 { -- fn low_mul(self, other: Self) -> u64x2; --} --impl Sse2U32x4 for u32x4 { -- #[inline] -- fn low_mul(self, other: Self) -> u64x2 { -- unsafe { x86_mm_mul_epu32(self, other) } -- } --} -- --pub trait Sse2I32x4 { -- fn packs(self, other: Self) -> i16x8; --} --impl Sse2I32x4 for i32x4 { -- #[inline] -- fn packs(self, other: Self) -> i16x8 { -- unsafe { x86_mm_packs_epi32(self, other) } -- } --} -- --pub trait Sse2Bool32ix4 {} --impl Sse2Bool32ix4 for bool32ix4 {} -- --// 16 bit ints -- --pub trait Sse2U16x8 { -- fn adds(self, other: Self) -> Self; -- fn subs(self, other: Self) -> Self; -- fn avg(self, other: Self) -> Self; -- fn mulhi(self, other: Self) -> Self; --} --impl Sse2U16x8 for u16x8 { -- #[inline] -- fn adds(self, other: Self) -> Self { -- unsafe { x86_mm_adds_epu16(self, other) } -- } -- #[inline] -- fn subs(self, other: Self) -> Self { -- unsafe { x86_mm_subs_epu16(self, other) } -- } -- -- #[inline] -- fn avg(self, other: Self) -> Self { -- unsafe { x86_mm_avg_epu16(self, other) } -- } -- -- #[inline] -- fn mulhi(self, other: Self) -> Self { -- unsafe { x86_mm_mulhi_epu16(self, other) } -- } --} -- --pub trait Sse2I16x8 { -- fn adds(self, other: Self) -> Self; -- fn subs(self, other: Self) -> Self; -- fn madd(self, other: Self) -> i32x4; -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn mulhi(self, other: Self) -> Self; -- fn packs(self, other: Self) -> i8x16; -- fn packus(self, other: Self) -> u8x16; --} --impl Sse2I16x8 for i16x8 { -- #[inline] -- fn adds(self, other: Self) -> Self { -- unsafe { x86_mm_adds_epi16(self, other) } -- } -- #[inline] -- fn subs(self, other: Self) -> Self { -- unsafe { x86_mm_subs_epi16(self, other) } -- } -- -- #[inline] -- fn madd(self, other: Self) -> i32x4 { -- unsafe { x86_mm_madd_epi16(self, other) } -- } -- -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epi16(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epi16(self, other) } -- } -- -- #[inline] -- fn mulhi(self, other: Self) -> Self { -- unsafe { x86_mm_mulhi_epi16(self, other) } -- } -- -- #[inline] -- fn packs(self, other: Self) -> i8x16 { -- unsafe { x86_mm_packs_epi16(self, other) } -- } -- #[inline] -- fn packus(self, other: Self) -> u8x16 { -- unsafe { x86_mm_packus_epi16(self, other) } -- } --} -- --pub trait Sse2Bool16ix8 {} --impl Sse2Bool16ix8 for bool16ix8 {} -- --// 8 bit ints -- --pub trait Sse2U8x16 { -- fn move_mask(self) -> u32; -- fn adds(self, other: Self) -> Self; -- fn subs(self, other: Self) -> Self; -- fn avg(self, other: Self) -> Self; -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn sad(self, other: Self) -> u64x2; --} --impl Sse2U8x16 for u8x16 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} -- } -- -- #[inline] -- fn adds(self, other: Self) -> Self { -- unsafe { x86_mm_adds_epu8(self, other) } -- } -- #[inline] -- fn subs(self, other: Self) -> Self { -- unsafe { x86_mm_subs_epu8(self, other) } -- } -- -- #[inline] -- fn avg(self, other: Self) -> Self { -- unsafe { x86_mm_avg_epu8(self, other) } -- } -- -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epu8(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epu8(self, other) } -- } -- -- #[inline] -- fn sad(self, other: Self) -> u64x2 { -- unsafe { x86_mm_sad_epu8(self, other) } -- } --} -- --pub trait Sse2I8x16 { -- fn move_mask(self) -> u32; -- fn adds(self, other: Self) -> Self; -- fn subs(self, other: Self) -> Self; --} --impl Sse2I8x16 for i8x16 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} -- } -- -- #[inline] -- fn adds(self, other: Self) -> Self { -- unsafe { x86_mm_adds_epi8(self, other) } -- } -- #[inline] -- fn subs(self, other: Self) -> Self { -- unsafe { x86_mm_subs_epi8(self, other) } -- } --} -- --pub trait Sse2Bool8ix16 { -- fn move_mask(self) -> u32; --} --impl Sse2Bool8ix16 for bool8ix16 { -- #[inline] -- fn move_mask(self) -> u32 { -- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} -- } --} -diff --git a/third_party/rust/simd/src/x86/sse3.rs b/third_party/rust/simd/src/x86/sse3.rs -deleted file mode 100644 -index bd70b569f9c0..000000000000 ---- a/third_party/rust/simd/src/x86/sse3.rs -+++ /dev/null -@@ -1,57 +0,0 @@ --use sixty_four::*; --use super::super::*; -- --extern "platform-intrinsic" { -- fn x86_mm_addsub_ps(x: f32x4, y: f32x4) -> f32x4; -- fn x86_mm_addsub_pd(x: f64x2, y: f64x2) -> f64x2; -- fn x86_mm_hadd_ps(x: f32x4, y: f32x4) -> f32x4; -- fn x86_mm_hadd_pd(x: f64x2, y: f64x2) -> f64x2; -- fn x86_mm_hsub_ps(x: f32x4, y: f32x4) -> f32x4; -- fn x86_mm_hsub_pd(x: f64x2, y: f64x2) -> f64x2; --} -- --pub trait Sse3F32x4 { -- fn addsub(self, other: Self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; --} -- --impl Sse3F32x4 for f32x4 { -- #[inline] -- fn addsub(self, other: Self) -> Self { -- unsafe { x86_mm_addsub_ps(self, other) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm_hadd_ps(self, other) } -- } -- -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm_hsub_ps(self, other) } -- } --} -- --pub trait Sse3F64x2 { -- fn addsub(self, other: Self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; --} -- --impl Sse3F64x2 for f64x2 { -- #[inline] -- fn addsub(self, other: Self) -> Self { -- unsafe { x86_mm_addsub_pd(self, other) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm_hadd_pd(self, other) } -- } -- -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm_hsub_pd(self, other) } -- } --} -diff --git a/third_party/rust/simd/src/x86/sse4_1.rs b/third_party/rust/simd/src/x86/sse4_1.rs -deleted file mode 100644 -index fa44678a0584..000000000000 ---- a/third_party/rust/simd/src/x86/sse4_1.rs -+++ /dev/null -@@ -1,155 +0,0 @@ --use super::super::*; --use x86::sse2::*; -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4; -- fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2; -- fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8; -- fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4; -- fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4; -- fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8; -- fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4; -- fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4; -- fn x86_mm_minpos_epu16(x: u16x8) -> u16x8; -- fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8; -- fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2; -- fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8; -- fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32; -- fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32; -- fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32; --} -- --// 32 bit floats -- --pub trait Sse41F32x4 {} --impl Sse41F32x4 for f32x4 {} -- --// 64 bit floats -- --pub trait Sse41F64x2 {} --impl Sse41F64x2 for f64x2 {} -- --// 64 bit integers -- --pub trait Sse41U64x2 { -- fn testc(self, other: Self) -> i32; -- fn testnzc(self, other: Self) -> i32; -- fn testz(self, other: Self) -> i32; --} --impl Sse41U64x2 for u64x2 { -- #[inline] -- fn testc(self, other: Self) -> i32 { -- unsafe { x86_mm_testc_si128(self, other) } -- } -- #[inline] -- fn testnzc(self, other: Self) -> i32 { -- unsafe { x86_mm_testnzc_si128(self, other) } -- } -- #[inline] -- fn testz(self, other: Self) -> i32 { -- unsafe { x86_mm_testz_si128(self, other) } -- } --} --pub trait Sse41I64x2 {} --impl Sse41I64x2 for i64x2 {} -- --pub trait Sse41Bool64ix2 {} --impl Sse41Bool64ix2 for bool64ix2 {} -- --// 32 bit integers -- --pub trait Sse41U32x4 { -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; --} --impl Sse41U32x4 for u32x4 { -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epu32(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epu32(self, other) } -- } --} --pub trait Sse41I32x4 { -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn low_mul(self, other: Self) -> i64x2; -- fn packus(self, other: Self) -> u16x8; --} --impl Sse41I32x4 for i32x4 { -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epi32(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epi32(self, other) } -- } -- -- #[inline] -- fn low_mul(self, other: Self) -> i64x2 { -- unsafe { x86_mm_mul_epi32(self, other) } -- } -- #[inline] -- fn packus(self, other: Self) -> u16x8 { -- unsafe { x86_mm_packus_epi32(self, other) } -- } --} -- --pub trait Sse41Bool32ix4 {} --impl Sse41Bool32ix4 for bool32ix4 {} -- --// 16 bit integers -- --pub trait Sse41U16x8 { -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; -- fn minpos(self) -> Self; --} --impl Sse41U16x8 for u16x8 { -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epu16(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epu16(self, other) } -- } -- -- #[inline] -- fn minpos(self) -> Self { -- unsafe { x86_mm_minpos_epu16(self) } -- } --} --pub trait Sse41I16x8 {} --impl Sse41I16x8 for i16x8 {} -- --pub trait Sse41Bool16ix8 {} --impl Sse41Bool16ix8 for bool16ix8 {} -- --// 8 bit integers -- --pub trait Sse41U8x16 {} --impl Sse41U8x16 for u8x16 {} --pub trait Sse41I8x16 { -- fn max(self, other: Self) -> Self; -- fn min(self, other: Self) -> Self; --} --impl Sse41I8x16 for i8x16 { -- #[inline] -- fn max(self, other: Self) -> Self { -- unsafe { x86_mm_max_epi8(self, other) } -- } -- #[inline] -- fn min(self, other: Self) -> Self { -- unsafe { x86_mm_min_epi8(self, other) } -- } --} -- --pub trait Sse41Bool8ix16 {} --impl Sse41Bool8ix16 for bool8ix16 {} -diff --git a/third_party/rust/simd/src/x86/sse4_2.rs b/third_party/rust/simd/src/x86/sse4_2.rs -deleted file mode 100644 -index 5afe4583cf71..000000000000 ---- a/third_party/rust/simd/src/x86/sse4_2.rs -+++ /dev/null -@@ -1,19 +0,0 @@ --use i8x16; -- --#[allow(dead_code)] --extern "platform-intrinsic" { -- fn x86_mm_cmpestra(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpestrc(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpestri(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpestrm(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i8x16; -- fn x86_mm_cmpestro(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpestrs(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpestrz(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; -- fn x86_mm_cmpistra(x: i8x16, y: i8x16, z: i32) -> i32; -- fn x86_mm_cmpistrc(x: i8x16, y: i8x16, z: i32) -> i32; -- fn x86_mm_cmpistri(x: i8x16, y: i8x16, z: i32) -> i32; -- fn x86_mm_cmpistrm(x: i8x16, y: i8x16, z: i32) -> i8x16; -- fn x86_mm_cmpistro(x: i8x16, y: i8x16, z: i32) -> i32; -- fn x86_mm_cmpistrs(x: i8x16, y: i8x16, z: i32) -> i32; -- fn x86_mm_cmpistrz(x: i8x16, y: i8x16, z: i32) -> i32; --} -diff --git a/third_party/rust/simd/src/x86/ssse3.rs b/third_party/rust/simd/src/x86/ssse3.rs -deleted file mode 100644 -index aa22a08a68a4..000000000000 ---- a/third_party/rust/simd/src/x86/ssse3.rs -+++ /dev/null -@@ -1,172 +0,0 @@ --use super::super::*; --use bitcast; -- --macro_rules! bitcast { -- ($func: ident($a: ident, $b: ident)) => { -- bitcast($func(bitcast($a), bitcast($b))) -- } --} -- --extern "platform-intrinsic" { -- fn x86_mm_abs_epi8(x: i8x16) -> i8x16; -- fn x86_mm_abs_epi16(x: i16x8) -> i16x8; -- fn x86_mm_abs_epi32(x: i32x4) -> i32x4; -- fn x86_mm_hadd_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_hadd_epi32(x: i32x4, y: i32x4) -> i32x4; -- fn x86_mm_hadds_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_hsub_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_hsub_epi32(x: i32x4, y: i32x4) -> i32x4; -- fn x86_mm_hsubs_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_maddubs_epi16(x: u8x16, y: i8x16) -> i16x8; -- fn x86_mm_mulhrs_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_shuffle_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_sign_epi8(x: i8x16, y: i8x16) -> i8x16; -- fn x86_mm_sign_epi16(x: i16x8, y: i16x8) -> i16x8; -- fn x86_mm_sign_epi32(x: i32x4, y: i32x4) -> i32x4; --} -- --// 32 bit integers -- --pub trait Ssse3I32x4 { -- fn abs(self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; -- fn sign(self, other: Self) -> Self; --} --impl Ssse3I32x4 for i32x4 { -- #[inline] -- fn abs(self) -> Self { -- unsafe { x86_mm_abs_epi32(self) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm_hadd_epi32(self, other) } -- } -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm_hsub_epi32(self, other) } -- } -- -- #[inline] -- fn sign(self, other: Self) -> Self { -- unsafe { x86_mm_sign_epi32(self, other) } -- } --} -- --pub trait Ssse3U32x4 { -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; --} --impl Ssse3U32x4 for u32x4 { -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { bitcast!(x86_mm_hadd_epi32(self, other)) } -- } -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { bitcast!(x86_mm_hsub_epi32(self, other)) } -- } --} -- --// 16 bit integers -- --pub trait Ssse3I16x8 { -- fn abs(self) -> Self; -- fn hadd(self, other: Self) -> Self; -- fn hadds(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; -- fn hsubs(self, other: Self) -> Self; -- fn sign(self, other: Self) -> Self; -- fn mulhrs(self, other: Self) -> Self; --} --impl Ssse3I16x8 for i16x8 { -- #[inline] -- fn abs(self) -> Self { -- unsafe { x86_mm_abs_epi16(self) } -- } -- -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { x86_mm_hadd_epi16(self, other) } -- } -- #[inline] -- fn hadds(self, other: Self) -> Self { -- unsafe { x86_mm_hadds_epi16(self, other) } -- } -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { x86_mm_hsub_epi16(self, other) } -- } -- #[inline] -- fn hsubs(self, other: Self) -> Self { -- unsafe { x86_mm_hsubs_epi16(self, other) } -- } -- -- #[inline] -- fn sign(self, other: Self) -> Self { -- unsafe { x86_mm_sign_epi16(self, other) } -- } -- -- #[inline] -- fn mulhrs(self, other: Self) -> Self { -- unsafe { x86_mm_mulhrs_epi16(self, other) } -- } --} -- --pub trait Ssse3U16x8 { -- fn hadd(self, other: Self) -> Self; -- fn hsub(self, other: Self) -> Self; --} --impl Ssse3U16x8 for u16x8 { -- #[inline] -- fn hadd(self, other: Self) -> Self { -- unsafe { bitcast!(x86_mm_hadd_epi16(self, other)) } -- } -- #[inline] -- fn hsub(self, other: Self) -> Self { -- unsafe { bitcast!(x86_mm_hsub_epi16(self, other)) } -- } --} -- -- --// 8 bit integers -- --pub trait Ssse3U8x16 { -- fn shuffle_bytes(self, indices: Self) -> Self; -- fn maddubs(self, other: i8x16) -> i16x8; --} -- --impl Ssse3U8x16 for u8x16 { -- #[inline] -- fn shuffle_bytes(self, indices: Self) -> Self { -- unsafe {bitcast!(x86_mm_shuffle_epi8(self, indices))} -- } -- -- fn maddubs(self, other: i8x16) -> i16x8 { -- unsafe { x86_mm_maddubs_epi16(self, other) } -- } --} -- --pub trait Ssse3I8x16 { -- fn abs(self) -> Self; -- fn shuffle_bytes(self, indices: Self) -> Self; -- fn sign(self, other: Self) -> Self; --} --impl Ssse3I8x16 for i8x16 { -- #[inline] -- fn abs(self) -> Self { -- unsafe {x86_mm_abs_epi8(self)} -- } -- #[inline] -- fn shuffle_bytes(self, indices: Self) -> Self { -- unsafe { -- x86_mm_shuffle_epi8(self, indices) -- } -- } -- -- #[inline] -- fn sign(self, other: Self) -> Self { -- unsafe { x86_mm_sign_epi8(self, other) } -- } --} -diff --git a/toolkit/moz.configure b/toolkit/moz.configure -index c2b3c768cba9..c3f3de62f09a 100644 ---- a/toolkit/moz.configure -+++ b/toolkit/moz.configure -@@ -696,14 +696,11 @@ set_config('MOZ_ENABLE_WEBRENDER', webrender.enable) - option('--enable-rust-simd', env='MOZ_RUST_SIMD', - help='Enable explicit SIMD in Rust code.') - --@depends('--enable-rust-simd', target, rustc_info) --def rust_simd(value, target, rustc_info): -- # As of 2018-06-05, the simd crate only works on aarch64, -- # armv7, x86 and x86_64. -+@depends('--enable-rust-simd', target) -+def rust_simd(value, target): -+ # As of 2019-03-04, the simd-accel feature of encoding_rs has not -+ # been properly set up outside aarch64, armv7, x86 and x86_64. - if target.cpu in ('aarch64', 'arm', 'x86', 'x86_64') and value: -- if rustc_info and rustc_info.version >= Version('1.33.0'): -- die('--enable-rust-simd does not work with Rust 1.33 or later. ' -- 'See https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 .') - return True - - set_config('MOZ_RUST_SIMD', rust_simd) --- -2.21.0 - diff --git a/libre/iceweasel/0001-bz-1521249.patch b/libre/iceweasel/0001-bz-1521249.patch new file mode 100644 index 000000000..fa5892b50 --- /dev/null +++ b/libre/iceweasel/0001-bz-1521249.patch @@ -0,0 +1,27006 @@ +From 3b9c2a82009be75797a67293fc70cec81a2ea179 Mon Sep 17 00:00:00 2001 +Message-Id: <3b9c2a82009be75797a67293fc70cec81a2ea179.1558465095.git.jan.steffens@gmail.com> +From: "Jan Alexander Steffens (heftig)" +Date: Tue, 21 May 2019 20:57:25 +0200 +Subject: [PATCH] bz 1521249 + +https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 + +Squashed commit of the following: + +commit e5b0d808797892e60766beb225a2440162fedbb3 +Author: Henri Sivonen +Date: Sat Mar 23 14:17:35 2019 +0000 + + Bug 1521249 part 2 - Make packed_simd compile with Rust 1.32 and later. r=glandium + + This adds the Firefox-required build.rs hack and reverts the commit + that added bitmasks in a way that depends on Rust 1.34 compiler + internals. + + Depends on D20288 + + Differential Revision: https://phabricator.services.mozilla.com/D20289 + +commit 5ec872f24d15a671d36d4bce186fcaebcd27776c +Author: Henri Sivonen +Date: Sat Mar 23 14:17:35 2019 +0000 + + Bug 1521249 part 1 - Update encoding_rs to 0.8.16. r=glandium + + This changeset requires nightly Rust (to be changed in the next part). + + Depends on D21891 + + Differential Revision: https://phabricator.services.mozilla.com/D20288 + +commit d4181e4196f1f971b73a46e7ef814adbe7fa338a +Author: Henri Sivonen +Date: Sat Mar 23 14:17:35 2019 +0000 + + Bug 1521249 part 0 - Remove Rust version cap from --enable-rust-simd. r=glandium + + Differential Revision: https://phabricator.services.mozilla.com/D21891 +--- + .cargo/config.in | 5 + + Cargo.lock | 55 +- + Cargo.toml | 1 + + third_party/rust/cfg-if/.cargo-checksum.json | 2 +- + third_party/rust/cfg-if/Cargo.toml | 30 +- + third_party/rust/cfg-if/README.md | 18 +- + third_party/rust/cfg-if/src/lib.rs | 53 +- + .../rust/encoding_rs/.cargo-checksum.json | 2 +- + third_party/rust/encoding_rs/Cargo.toml | 12 +- + third_party/rust/encoding_rs/README.md | 50 +- + third_party/rust/encoding_rs/build.rs | 8 + + third_party/rust/encoding_rs/src/handles.rs | 2 +- + third_party/rust/encoding_rs/src/lib.rs | 7 +- + third_party/rust/encoding_rs/src/mem.rs | 24 +- + .../rust/encoding_rs/src/simd_funcs.rs | 93 +- + .../rust/encoding_rs/src/x_user_defined.rs | 7 +- + third_party/rust/packed_simd/.appveyor.yml | 59 + + .../rust/packed_simd/.cargo-checksum.json | 1 + + third_party/rust/packed_simd/.travis.yml | 308 ++++ + third_party/rust/packed_simd/Cargo.toml | 42 + + .../rust/{simd => packed_simd}/LICENSE-APACHE | 0 + .../rust/{simd => packed_simd}/LICENSE-MIT | 4 +- + third_party/rust/packed_simd/bors.toml | 3 + + third_party/rust/packed_simd/build.rs | 8 + + third_party/rust/packed_simd/ci/all.sh | 71 + + .../packed_simd/ci/android-install-ndk.sh | 37 + + .../packed_simd/ci/android-install-sdk.sh | 60 + + .../rust/packed_simd/ci/android-sysimage.sh | 56 + + third_party/rust/packed_simd/ci/benchmark.sh | 32 + + .../ci/deploy_and_run_on_ios_simulator.rs | 176 +++ + .../docker/aarch64-linux-android/Dockerfile | 47 + + .../aarch64-unknown-linux-gnu/Dockerfile | 14 + + .../docker/arm-linux-androideabi/Dockerfile | 47 + + .../arm-unknown-linux-gnueabi/Dockerfile | 15 + + .../arm-unknown-linux-gnueabihf/Dockerfile | 13 + + .../armv7-unknown-linux-gnueabihf/Dockerfile | 13 + + .../docker/i586-unknown-linux-gnu/Dockerfile | 7 + + .../docker/i686-unknown-linux-gnu/Dockerfile | 7 + + .../docker/mips-unknown-linux-gnu/Dockerfile | 13 + + .../mips64-unknown-linux-gnuabi64/Dockerfile | 10 + + .../Dockerfile | 10 + + .../mipsel-unknown-linux-musl/Dockerfile | 25 + + .../powerpc-unknown-linux-gnu/Dockerfile | 12 + + .../powerpc64-unknown-linux-gnu/Dockerfile | 17 + + .../powerpc64le-unknown-linux-gnu/Dockerfile | 11 + + .../docker/s390x-unknown-linux-gnu/Dockerfile | 20 + + .../sparc64-unknown-linux-gnu/Dockerfile | 18 + + .../thumbv7neon-linux-androideabi/Dockerfile | 47 + + .../Dockerfile | 13 + + .../docker/wasm32-unknown-unknown/Dockerfile | 37 + + .../ci/docker/x86_64-linux-android/Dockerfile | 29 + + .../Dockerfile | 16 + + .../x86_64-unknown-linux-gnu/Dockerfile | 10 + + third_party/rust/packed_simd/ci/dox.sh | 24 + + .../rust/packed_simd/ci/linux-s390x.sh | 18 + + .../rust/packed_simd/ci/linux-sparc64.sh | 17 + + third_party/rust/packed_simd/ci/lld-shim.rs | 11 + + .../rust/packed_simd/ci/max_line_width.sh | 17 + + third_party/rust/packed_simd/ci/run-docker.sh | 38 + + third_party/rust/packed_simd/ci/run.sh | 96 ++ + .../rust/packed_simd/ci/run_examples.sh | 51 + + .../rust/packed_simd/ci/runtest-android.rs | 45 + + .../rust/packed_simd/ci/setup_benchmarks.sh | 10 + + .../rust/packed_simd/ci/test-runner-linux | 24 + + third_party/rust/packed_simd/contributing.md | 67 + + .../rust/packed_simd/perf-guide/.gitignore | 1 + + .../rust/packed_simd/perf-guide/book.toml | 12 + + .../packed_simd/perf-guide/src/SUMMARY.md | 21 + + .../rust/packed_simd/perf-guide/src/ascii.css | 4 + + .../perf-guide/src/bound_checks.md | 22 + + .../perf-guide/src/float-math/approx.md | 8 + + .../perf-guide/src/float-math/fma.md | 6 + + .../perf-guide/src/float-math/fp.md | 3 + + .../perf-guide/src/float-math/svml.md | 7 + + .../perf-guide/src/introduction.md | 26 + + .../packed_simd/perf-guide/src/prof/linux.md | 107 ++ + .../packed_simd/perf-guide/src/prof/mca.md | 100 ++ + .../perf-guide/src/prof/profiling.md | 14 + + .../src/target-feature/attribute.md | 5 + + .../perf-guide/src/target-feature/features.md | 13 + + .../perf-guide/src/target-feature/inlining.md | 5 + + .../perf-guide/src/target-feature/practice.md | 31 + + .../perf-guide/src/target-feature/runtime.md | 5 + + .../src/target-feature/rustflags.md | 77 + + .../perf-guide/src/vert-hor-ops.md | 76 + + third_party/rust/packed_simd/readme.md | 182 +++ + third_party/rust/packed_simd/rustfmt.toml | 7 + + third_party/rust/packed_simd/src/api.rs | 301 ++++ + .../rust/packed_simd/src/api/bit_manip.rs | 128 ++ + third_party/rust/packed_simd/src/api/cast.rs | 108 ++ + .../rust/packed_simd/src/api/cast/macros.rs | 82 + + .../rust/packed_simd/src/api/cast/v128.rs | 79 + + .../rust/packed_simd/src/api/cast/v16.rs | 17 + + .../rust/packed_simd/src/api/cast/v256.rs | 81 + + .../rust/packed_simd/src/api/cast/v32.rs | 30 + + .../rust/packed_simd/src/api/cast/v512.rs | 68 + + .../rust/packed_simd/src/api/cast/v64.rs | 47 + + third_party/rust/packed_simd/src/api/cmp.rs | 16 + + .../rust/packed_simd/src/api/cmp/eq.rs | 27 + + .../rust/packed_simd/src/api/cmp/ord.rs | 43 + + .../packed_simd/src/api/cmp/partial_eq.rs | 67 + + .../packed_simd/src/api/cmp/partial_ord.rs | 234 +++ + .../rust/packed_simd/src/api/cmp/vertical.rs | 114 ++ + .../rust/packed_simd/src/api/default.rs | 28 + + third_party/rust/packed_simd/src/api/fmt.rs | 12 + + .../rust/packed_simd/src/api/fmt/binary.rs | 56 + + .../rust/packed_simd/src/api/fmt/debug.rs | 62 + + .../rust/packed_simd/src/api/fmt/lower_hex.rs | 56 + + .../rust/packed_simd/src/api/fmt/octal.rs | 56 + + .../rust/packed_simd/src/api/fmt/upper_hex.rs | 56 + + third_party/rust/packed_simd/src/api/from.rs | 7 + + .../packed_simd/src/api/from/from_array.rs | 121 ++ + .../packed_simd/src/api/from/from_vector.rs | 67 + + third_party/rust/packed_simd/src/api/hash.rs | 47 + + .../rust/packed_simd/src/api/into_bits.rs | 59 + + .../src/api/into_bits/arch_specific.rs | 190 +++ + .../packed_simd/src/api/into_bits/macros.rs | 74 + + .../packed_simd/src/api/into_bits/v128.rs | 28 + + .../rust/packed_simd/src/api/into_bits/v16.rs | 9 + + .../packed_simd/src/api/into_bits/v256.rs | 27 + + .../rust/packed_simd/src/api/into_bits/v32.rs | 13 + + .../packed_simd/src/api/into_bits/v512.rs | 27 + + .../rust/packed_simd/src/api/into_bits/v64.rs | 18 + + third_party/rust/packed_simd/src/api/math.rs | 4 + + .../rust/packed_simd/src/api/math/float.rs | 61 + + .../packed_simd/src/api/math/float/abs.rs | 31 + + .../packed_simd/src/api/math/float/consts.rs | 86 + + .../packed_simd/src/api/math/float/cos.rs | 44 + + .../packed_simd/src/api/math/float/exp.rs | 33 + + .../rust/packed_simd/src/api/math/float/ln.rs | 33 + + .../packed_simd/src/api/math/float/mul_add.rs | 44 + + .../src/api/math/float/mul_adde.rs | 48 + + .../packed_simd/src/api/math/float/powf.rs | 36 + + .../packed_simd/src/api/math/float/recpre.rs | 36 + + .../packed_simd/src/api/math/float/rsqrte.rs | 40 + + .../packed_simd/src/api/math/float/sin.rs | 50 + + .../packed_simd/src/api/math/float/sqrt.rs | 35 + + .../packed_simd/src/api/math/float/sqrte.rs | 44 + + .../rust/packed_simd/src/api/minimal.rs | 6 + + .../rust/packed_simd/src/api/minimal/iuf.rs | 167 ++ + .../rust/packed_simd/src/api/minimal/mask.rs | 174 +++ + .../rust/packed_simd/src/api/minimal/ptr.rs | 1385 +++++++++++++++++ + third_party/rust/packed_simd/src/api/ops.rs | 32 + + .../src/api/ops/scalar_arithmetic.rs | 203 +++ + .../packed_simd/src/api/ops/scalar_bitwise.rs | 162 ++ + .../src/api/ops/scalar_mask_bitwise.rs | 140 ++ + .../packed_simd/src/api/ops/scalar_shifts.rs | 107 ++ + .../src/api/ops/vector_arithmetic.rs | 148 ++ + .../packed_simd/src/api/ops/vector_bitwise.rs | 129 ++ + .../src/api/ops/vector_float_min_max.rs | 69 + + .../src/api/ops/vector_int_min_max.rs | 57 + + .../src/api/ops/vector_mask_bitwise.rs | 116 ++ + .../packed_simd/src/api/ops/vector_neg.rs | 43 + + .../packed_simd/src/api/ops/vector_rotates.rs | 90 ++ + .../packed_simd/src/api/ops/vector_shifts.rs | 107 ++ + third_party/rust/packed_simd/src/api/ptr.rs | 4 + + .../packed_simd/src/api/ptr/gather_scatter.rs | 241 +++ + .../rust/packed_simd/src/api/reductions.rs | 12 + + .../packed_simd/src/api/reductions/bitwise.rs | 151 ++ + .../src/api/reductions/float_arithmetic.rs | 312 ++++ + .../src/api/reductions/integer_arithmetic.rs | 197 +++ + .../packed_simd/src/api/reductions/mask.rs | 89 ++ + .../packed_simd/src/api/reductions/min_max.rs | 377 +++++ + .../rust/packed_simd/src/api/select.rs | 75 + + .../rust/packed_simd/src/api/shuffle.rs | 190 +++ + .../rust/packed_simd/src/api/shuffle1_dyn.rs | 159 ++ + third_party/rust/packed_simd/src/api/slice.rs | 7 + + .../packed_simd/src/api/slice/from_slice.rs | 216 +++ + .../src/api/slice/write_to_slice.rs | 211 +++ + .../rust/packed_simd/src/api/swap_bytes.rs | 192 +++ + third_party/rust/packed_simd/src/codegen.rs | 59 + + .../rust/packed_simd/src/codegen/bit_manip.rs | 354 +++++ + .../rust/packed_simd/src/codegen/llvm.rs | 99 ++ + .../rust/packed_simd/src/codegen/math.rs | 3 + + .../packed_simd/src/codegen/math/float.rs | 18 + + .../packed_simd/src/codegen/math/float/abs.rs | 103 ++ + .../packed_simd/src/codegen/math/float/cos.rs | 103 ++ + .../src/codegen/math/float/cos_pi.rs | 87 ++ + .../packed_simd/src/codegen/math/float/exp.rs | 112 ++ + .../packed_simd/src/codegen/math/float/ln.rs | 112 ++ + .../src/codegen/math/float/macros.rs | 559 +++++++ + .../src/codegen/math/float/mul_add.rs | 109 ++ + .../src/codegen/math/float/mul_adde.rs | 66 + + .../src/codegen/math/float/powf.rs | 112 ++ + .../packed_simd/src/codegen/math/float/sin.rs | 103 ++ + .../src/codegen/math/float/sin_cos_pi.rs | 195 +++ + .../src/codegen/math/float/sin_pi.rs | 87 ++ + .../src/codegen/math/float/sqrt.rs | 103 ++ + .../src/codegen/math/float/sqrte.rs | 67 + + .../src/codegen/pointer_sized_int.rs | 28 + + .../packed_simd/src/codegen/reductions.rs | 1 + + .../src/codegen/reductions/mask.rs | 69 + + .../src/codegen/reductions/mask/aarch64.rs | 71 + + .../src/codegen/reductions/mask/arm.rs | 54 + + .../src/codegen/reductions/mask/fallback.rs | 6 + + .../codegen/reductions/mask/fallback_impl.rs | 237 +++ + .../src/codegen/reductions/mask/x86.rs | 194 +++ + .../src/codegen/reductions/mask/x86/avx.rs | 101 ++ + .../src/codegen/reductions/mask/x86/avx2.rs | 35 + + .../src/codegen/reductions/mask/x86/sse.rs | 68 + + .../src/codegen/reductions/mask/x86/sse2.rs | 70 + + .../rust/packed_simd/src/codegen/shuffle.rs | 302 ++++ + .../packed_simd/src/codegen/shuffle1_dyn.rs | 432 +++++ + .../packed_simd/src/codegen/swap_bytes.rs | 189 +++ + .../rust/packed_simd/src/codegen/v128.rs | 46 + + .../rust/packed_simd/src/codegen/v16.rs | 7 + + .../rust/packed_simd/src/codegen/v256.rs | 78 + + .../rust/packed_simd/src/codegen/v32.rs | 11 + + .../rust/packed_simd/src/codegen/v512.rs | 145 ++ + .../rust/packed_simd/src/codegen/v64.rs | 21 + + .../rust/packed_simd/src/codegen/vPtr.rs | 33 + + .../rust/packed_simd/src/codegen/vSize.rs | 43 + + third_party/rust/packed_simd/src/lib.rs | 327 ++++ + third_party/rust/packed_simd/src/masks.rs | 128 ++ + third_party/rust/packed_simd/src/sealed.rs | 41 + + third_party/rust/packed_simd/src/testing.rs | 8 + + .../rust/packed_simd/src/testing/macros.rs | 44 + + .../rust/packed_simd/src/testing/utils.rs | 135 ++ + third_party/rust/packed_simd/src/v128.rs | 80 + + third_party/rust/packed_simd/src/v16.rs | 16 + + third_party/rust/packed_simd/src/v256.rs | 86 + + third_party/rust/packed_simd/src/v32.rs | 29 + + third_party/rust/packed_simd/src/v512.rs | 99 ++ + third_party/rust/packed_simd/src/v64.rs | 66 + + third_party/rust/packed_simd/src/vPtr.rs | 34 + + third_party/rust/packed_simd/src/vSize.rs | 53 + + .../rust/packed_simd/tests/endianness.rs | 262 ++++ + third_party/rust/simd/.cargo-checksum.json | 1 - + third_party/rust/simd/Cargo.toml | 37 - + third_party/rust/simd/README.md | 11 - + third_party/rust/simd/benches/mandelbrot.rs | 117 -- + third_party/rust/simd/benches/matrix.rs | 485 ------ + third_party/rust/simd/build.rs | 3 - + third_party/rust/simd/examples/axpy.rs | 65 - + third_party/rust/simd/examples/convert.rs | 38 - + third_party/rust/simd/examples/dot-product.rs | 60 - + .../simd/examples/fannkuch-redux-nosimd.rs | 156 -- + .../rust/simd/examples/fannkuch-redux.rs | 233 --- + third_party/rust/simd/examples/mandelbrot.rs | 125 -- + .../rust/simd/examples/matrix-inverse.rs | 281 ---- + .../rust/simd/examples/nbody-nosimd.rs | 156 -- + third_party/rust/simd/examples/nbody.rs | 170 -- + third_party/rust/simd/examples/ops.rs | 10 - + .../simd/examples/spectral-norm-nosimd.rs | 106 -- + .../rust/simd/examples/spectral-norm.rs | 74 - + third_party/rust/simd/src/aarch64/mod.rs | 3 - + third_party/rust/simd/src/aarch64/neon.rs | 681 -------- + third_party/rust/simd/src/arm/mod.rs | 4 - + third_party/rust/simd/src/arm/neon.rs | 622 -------- + third_party/rust/simd/src/common.rs | 520 ------- + third_party/rust/simd/src/lib.rs | 804 ---------- + third_party/rust/simd/src/sixty_four.rs | 228 --- + third_party/rust/simd/src/v256.rs | 436 ------ + third_party/rust/simd/src/x86/avx.rs | 290 ---- + third_party/rust/simd/src/x86/avx2.rs | 65 - + third_party/rust/simd/src/x86/mod.rs | 16 - + third_party/rust/simd/src/x86/sse2.rs | 359 ----- + third_party/rust/simd/src/x86/sse3.rs | 57 - + third_party/rust/simd/src/x86/sse4_1.rs | 155 -- + third_party/rust/simd/src/x86/sse4_2.rs | 19 - + third_party/rust/simd/src/x86/ssse3.rs | 172 -- + toolkit/moz.configure | 11 +- + 262 files changed, 17411 insertions(+), 6734 deletions(-) + create mode 100644 third_party/rust/packed_simd/.appveyor.yml + create mode 100644 third_party/rust/packed_simd/.cargo-checksum.json + create mode 100644 third_party/rust/packed_simd/.travis.yml + create mode 100644 third_party/rust/packed_simd/Cargo.toml + rename third_party/rust/{simd => packed_simd}/LICENSE-APACHE (100%) + rename third_party/rust/{simd => packed_simd}/LICENSE-MIT (93%) + create mode 100644 third_party/rust/packed_simd/bors.toml + create mode 100644 third_party/rust/packed_simd/build.rs + create mode 100644 third_party/rust/packed_simd/ci/all.sh + create mode 100644 third_party/rust/packed_simd/ci/android-install-ndk.sh + create mode 100644 third_party/rust/packed_simd/ci/android-install-sdk.sh + create mode 100644 third_party/rust/packed_simd/ci/android-sysimage.sh + create mode 100644 third_party/rust/packed_simd/ci/benchmark.sh + create mode 100644 third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs + create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/dox.sh + create mode 100644 third_party/rust/packed_simd/ci/linux-s390x.sh + create mode 100644 third_party/rust/packed_simd/ci/linux-sparc64.sh + create mode 100644 third_party/rust/packed_simd/ci/lld-shim.rs + create mode 100644 third_party/rust/packed_simd/ci/max_line_width.sh + create mode 100644 third_party/rust/packed_simd/ci/run-docker.sh + create mode 100644 third_party/rust/packed_simd/ci/run.sh + create mode 100644 third_party/rust/packed_simd/ci/run_examples.sh + create mode 100644 third_party/rust/packed_simd/ci/runtest-android.rs + create mode 100644 third_party/rust/packed_simd/ci/setup_benchmarks.sh + create mode 100644 third_party/rust/packed_simd/ci/test-runner-linux + create mode 100644 third_party/rust/packed_simd/contributing.md + create mode 100644 third_party/rust/packed_simd/perf-guide/.gitignore + create mode 100644 third_party/rust/packed_simd/perf-guide/book.toml + create mode 100644 third_party/rust/packed_simd/perf-guide/src/SUMMARY.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/ascii.css + create mode 100644 third_party/rust/packed_simd/perf-guide/src/bound_checks.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/approx.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fma.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fp.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/svml.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/introduction.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/linux.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/mca.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/profiling.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/features.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md + create mode 100644 third_party/rust/packed_simd/readme.md + create mode 100644 third_party/rust/packed_simd/rustfmt.toml + create mode 100644 third_party/rust/packed_simd/src/api.rs + create mode 100644 third_party/rust/packed_simd/src/api/bit_manip.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/macros.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v128.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v16.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v256.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v32.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v512.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v64.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/eq.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/ord.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_eq.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_ord.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/vertical.rs + create mode 100644 third_party/rust/packed_simd/src/api/default.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/binary.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/debug.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/lower_hex.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/octal.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/upper_hex.rs + create mode 100644 third_party/rust/packed_simd/src/api/from.rs + create mode 100644 third_party/rust/packed_simd/src/api/from/from_array.rs + create mode 100644 third_party/rust/packed_simd/src/api/from/from_vector.rs + create mode 100644 third_party/rust/packed_simd/src/api/hash.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/macros.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v128.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v16.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v256.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v32.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v512.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v64.rs + create mode 100644 third_party/rust/packed_simd/src/api/math.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/abs.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/consts.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/cos.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/exp.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/ln.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_add.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_adde.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/powf.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/recpre.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/rsqrte.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sin.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrt.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrte.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/iuf.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/mask.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/ptr.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_neg.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_rotates.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_shifts.rs + create mode 100644 third_party/rust/packed_simd/src/api/ptr.rs + create mode 100644 third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/mask.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/select.rs + create mode 100644 third_party/rust/packed_simd/src/api/shuffle.rs + create mode 100644 third_party/rust/packed_simd/src/api/shuffle1_dyn.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice/from_slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice/write_to_slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/swap_bytes.rs + create mode 100644 third_party/rust/packed_simd/src/codegen.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/bit_manip.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/llvm.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/abs.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/exp.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/ln.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/macros.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/powf.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/swap_bytes.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v128.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v16.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v256.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v32.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v512.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v64.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/vPtr.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/vSize.rs + create mode 100644 third_party/rust/packed_simd/src/lib.rs + create mode 100644 third_party/rust/packed_simd/src/masks.rs + create mode 100644 third_party/rust/packed_simd/src/sealed.rs + create mode 100644 third_party/rust/packed_simd/src/testing.rs + create mode 100644 third_party/rust/packed_simd/src/testing/macros.rs + create mode 100644 third_party/rust/packed_simd/src/testing/utils.rs + create mode 100644 third_party/rust/packed_simd/src/v128.rs + create mode 100644 third_party/rust/packed_simd/src/v16.rs + create mode 100644 third_party/rust/packed_simd/src/v256.rs + create mode 100644 third_party/rust/packed_simd/src/v32.rs + create mode 100644 third_party/rust/packed_simd/src/v512.rs + create mode 100644 third_party/rust/packed_simd/src/v64.rs + create mode 100644 third_party/rust/packed_simd/src/vPtr.rs + create mode 100644 third_party/rust/packed_simd/src/vSize.rs + create mode 100644 third_party/rust/packed_simd/tests/endianness.rs + delete mode 100644 third_party/rust/simd/.cargo-checksum.json + delete mode 100644 third_party/rust/simd/Cargo.toml + delete mode 100644 third_party/rust/simd/README.md + delete mode 100755 third_party/rust/simd/benches/mandelbrot.rs + delete mode 100755 third_party/rust/simd/benches/matrix.rs + delete mode 100644 third_party/rust/simd/build.rs + delete mode 100755 third_party/rust/simd/examples/axpy.rs + delete mode 100644 third_party/rust/simd/examples/convert.rs + delete mode 100755 third_party/rust/simd/examples/dot-product.rs + delete mode 100644 third_party/rust/simd/examples/fannkuch-redux-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/fannkuch-redux.rs + delete mode 100755 third_party/rust/simd/examples/mandelbrot.rs + delete mode 100644 third_party/rust/simd/examples/matrix-inverse.rs + delete mode 100644 third_party/rust/simd/examples/nbody-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/nbody.rs + delete mode 100644 third_party/rust/simd/examples/ops.rs + delete mode 100644 third_party/rust/simd/examples/spectral-norm-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/spectral-norm.rs + delete mode 100644 third_party/rust/simd/src/aarch64/mod.rs + delete mode 100644 third_party/rust/simd/src/aarch64/neon.rs + delete mode 100644 third_party/rust/simd/src/arm/mod.rs + delete mode 100644 third_party/rust/simd/src/arm/neon.rs + delete mode 100644 third_party/rust/simd/src/common.rs + delete mode 100644 third_party/rust/simd/src/lib.rs + delete mode 100644 third_party/rust/simd/src/sixty_four.rs + delete mode 100644 third_party/rust/simd/src/v256.rs + delete mode 100644 third_party/rust/simd/src/x86/avx.rs + delete mode 100644 third_party/rust/simd/src/x86/avx2.rs + delete mode 100644 third_party/rust/simd/src/x86/mod.rs + delete mode 100644 third_party/rust/simd/src/x86/sse2.rs + delete mode 100644 third_party/rust/simd/src/x86/sse3.rs + delete mode 100644 third_party/rust/simd/src/x86/sse4_1.rs + delete mode 100644 third_party/rust/simd/src/x86/sse4_2.rs + delete mode 100644 third_party/rust/simd/src/x86/ssse3.rs + +diff --git a/.cargo/config.in b/.cargo/config.in +index 4808857b5b5f..0773d70cf83a 100644 +--- a/.cargo/config.in ++++ b/.cargo/config.in +@@ -22,6 +22,11 @@ git = "https://github.com/glandium/cc-rs" + branch = "1.0.23-clang-cl-aarch64" + replace-with = "vendored-sources" + ++[source."https://github.com/rust-lang-nursery/packed_simd"] ++git = "https://github.com/hsivonen/packed_simd" ++branch = "rust_1_32" ++replace-with = "vendored-sources" ++ + [source.vendored-sources] + directory = '@top_srcdir@/third_party/rust' + +diff --git a/Cargo.lock b/Cargo.lock +index b2e2891dd828..bbbd00fbb5e4 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -146,7 +146,7 @@ version = "0.3.9" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", +@@ -232,7 +232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +@@ -386,7 +386,7 @@ dependencies = [ + + [[package]] + name = "cfg-if" +-version = "0.1.2" ++version = "0.1.6" + source = "registry+https://github.com/rust-lang/crates.io-index" + + [[package]] +@@ -589,49 +589,49 @@ version = "0.3.1" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-epoch" + version = "0.4.3" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-utils" + version = "0.2.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-utils" + version = "0.3.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-utils" + version = "0.6.3" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -869,25 +869,25 @@ name = "encoding_c" + version = "0.9.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "encoding_glue" + version = "0.1.0" + dependencies = [ +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + "nserror 0.1.0", + "nsstring 0.1.0", + ] + + [[package]] + name = "encoding_rs" +-version = "0.8.14" ++version = "0.8.16" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +- "simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ++ "packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)", + ] + + [[package]] +@@ -1524,7 +1524,7 @@ name = "log" + version = "0.4.6" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -1789,7 +1789,7 @@ name = "net2" + version = "0.2.32" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", + ] +@@ -1843,7 +1843,7 @@ name = "nsstring" + version = "0.1.0" + dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -1929,6 +1929,14 @@ dependencies = [ + "stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + ] + ++[[package]] ++name = "packed_simd" ++version = "0.3.3" ++source = "git+https://github.com/hsivonen/packed_simd?branch=rust_1_32#3541e3818fdc7c2a24f87e3459151a4ce955a67a" ++dependencies = [ ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ++] ++ + [[package]] + name = "parking_lot" + version = "0.6.3" +@@ -2442,11 +2450,6 @@ dependencies = [ + "opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + ] + +-[[package]] +-name = "simd" +-version = "0.2.3" +-source = "registry+https://github.com/rust-lang/crates.io-index" +- + [[package]] + name = "siphasher" + version = "0.2.1" +@@ -3040,7 +3043,7 @@ name = "uuid" + version = "0.6.5" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + ] + +@@ -3125,7 +3128,7 @@ dependencies = [ + "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "core-foundation 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "core-graphics 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", + "core-text 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +@@ -3365,7 +3368,7 @@ dependencies = [ + "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" + "checksum cc 1.0.23 (git+https://github.com/glandium/cc-rs?branch=1.0.23-clang-cl-aarch64)" = "" + "checksum cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fc0086be9ca82f7fc89fc873435531cb898b86e850005850de1f820e2db6e9b" +-"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" ++"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" + "checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" + "checksum clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)" = "481e42017c1416b1c0856ece45658ecbb7c93d8a93455f7e5fa77f3b35455557" + "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536" +@@ -3416,7 +3419,7 @@ dependencies = [ + "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" + "checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621" + "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee" +-"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" ++"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73" + "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad" + "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" + "checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea" +@@ -3502,6 +3505,7 @@ dependencies = [ + "checksum ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2f0015e9e8e28ee20c581cfbfe47c650cedeb9ed0721090e0b7ebb10b9cdbcc2" + "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" + "checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" ++"checksum packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)" = "" + "checksum parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69376b761943787ebd5cc85a5bc95958651a22609c5c1c2b65de21786baec72b" + "checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" + "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +@@ -3557,7 +3561,6 @@ dependencies = [ + "checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae" + "checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0" + "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" +-"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84" + "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84" + "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" + "checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d" +diff --git a/Cargo.toml b/Cargo.toml +index c63c81e3409d..4a9d9212514a 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -61,3 +61,4 @@ libudev-sys = { path = "dom/webauthn/libudev-sys" } + serde_derive = { git = "https://github.com/servo/serde", branch = "deserialize_from_enums9" } + winapi = { git = "https://github.com/froydnj/winapi-rs", branch = "aarch64" } + cc = { git = "https://github.com/glandium/cc-rs", branch = "1.0.23-clang-cl-aarch64" } ++packed_simd = { git = "https://github.com/hsivonen/packed_simd", branch = "rust_1_32" } +diff --git a/third_party/rust/cfg-if/.cargo-checksum.json b/third_party/rust/cfg-if/.cargo-checksum.json +index 89b14a227216..b744a21d9fd1 100644 +--- a/third_party/rust/cfg-if/.cargo-checksum.json ++++ b/third_party/rust/cfg-if/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"Cargo.toml":"764b9ce160653e841430da3919ff968b957ff811f7da42c8483c8bfc2f06be25","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"3fa9368c60bc701dea294fbacae0469188c4be1de79f82e972bb9b321776cd52","src/lib.rs":"6915169e3ca05f28e1cb0e052379d74f2496400de1240b74c56e55c2674a6560","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"} +\ No newline at end of file ++{"files":{"Cargo.toml":"090d983ec20ad09e59f6b7679b48b9b54e9c0841cf2922b81cba485edcd40876","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"1cd0ebc3b30a9c9eddb0fda5515b5a52ec2b85a087328f0ee9f4d68cbb28afc2","src/lib.rs":"f02d6e295109365cf54884e5282a3e7d1e1f62857c700f23cd013e94a56bd803","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"} +\ No newline at end of file +diff --git a/third_party/rust/cfg-if/Cargo.toml b/third_party/rust/cfg-if/Cargo.toml +index 7afa063d1ef5..84c4fc7835ab 100644 +--- a/third_party/rust/cfg-if/Cargo.toml ++++ b/third_party/rust/cfg-if/Cargo.toml +@@ -1,14 +1,24 @@ ++# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO ++# ++# When uploading crates to the registry Cargo will automatically ++# "normalize" Cargo.toml files for maximal compatibility ++# with all versions of Cargo and also rewrite `path` dependencies ++# to registry (e.g. crates.io) dependencies ++# ++# If you believe there's an error in this file please file an ++# issue against the rust-lang/cargo repository. If you're ++# editing this file be aware that the upstream Cargo.toml ++# will likely look very different (and much more reasonable) ++ + [package] + name = "cfg-if" +-version = "0.1.2" ++version = "0.1.6" + authors = ["Alex Crichton "] +-license = "MIT/Apache-2.0" +-readme = "README.md" +-repository = "https://github.com/alexcrichton/cfg-if" ++description = "A macro to ergonomically define an item depending on a large number of #[cfg]\nparameters. Structured like an if-else chain, the first matching branch is the\nitem that gets emitted.\n" + homepage = "https://github.com/alexcrichton/cfg-if" +-documentation = "http://alexcrichton.com/cfg-if" +-description = """ +-A macro to ergonomically define an item depending on a large number of #[cfg] +-parameters. Structured like an if-else chain, the first matching branch is the +-item that gets emitted. +-""" ++documentation = "https://docs.rs/cfg-if" ++readme = "README.md" ++license = "MIT/Apache-2.0" ++repository = "https://github.com/alexcrichton/cfg-if" ++[badges.travis-ci] ++repository = "alexcrichton/cfg-if" +diff --git a/third_party/rust/cfg-if/README.md b/third_party/rust/cfg-if/README.md +index e9859dadb609..344a946c0487 100644 +--- a/third_party/rust/cfg-if/README.md ++++ b/third_party/rust/cfg-if/README.md +@@ -2,7 +2,7 @@ + + [![Build Status](https://travis-ci.org/alexcrichton/cfg-if.svg?branch=master)](https://travis-ci.org/alexcrichton/cfg-if) + +-[Documentation](http://alexcrichton.com/cfg-if) ++[Documentation](https://docs.rs/cfg-if) + + A macro to ergonomically define an item depending on a large number of #[cfg] + parameters. Structured like an if-else chain, the first matching branch is the +@@ -36,9 +36,17 @@ fn main() { + + # License + +-`cfg-if` is primarily distributed under the terms of both the MIT license and +-the Apache License (Version 2.0), with portions covered by various BSD-like +-licenses. ++This project is licensed under either of + +-See LICENSE-APACHE, and LICENSE-MIT for details. ++ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ++ http://www.apache.org/licenses/LICENSE-2.0) ++ * MIT license ([LICENSE-MIT](LICENSE-MIT) or ++ http://opensource.org/licenses/MIT) + ++at your option. ++ ++### Contribution ++ ++Unless you explicitly state otherwise, any contribution intentionally submitted ++for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be ++dual licensed as above, without any additional terms or conditions. +diff --git a/third_party/rust/cfg-if/src/lib.rs b/third_party/rust/cfg-if/src/lib.rs +index 563cda81f42d..ff144f69f862 100644 +--- a/third_party/rust/cfg-if/src/lib.rs ++++ b/third_party/rust/cfg-if/src/lib.rs +@@ -1,81 +1,90 @@ +-//! A macro for defining #[cfg] if-else statements. ++//! A macro for defining `#[cfg]` if-else statements. + //! + //! The macro provided by this crate, `cfg_if`, is similar to the `if/elif` C + //! preprocessor macro by allowing definition of a cascade of `#[cfg]` cases, + //! emitting the implementation which matches first. + //! +-//! This allows you to conveniently provide a long list #[cfg]'d blocks of code ++//! This allows you to conveniently provide a long list `#[cfg]`'d blocks of code + //! without having to rewrite each clause multiple times. + //! + //! # Example + //! + //! ``` + //! #[macro_use] + //! extern crate cfg_if; + //! + //! cfg_if! { + //! if #[cfg(unix)] { + //! fn foo() { /* unix specific functionality */ } + //! } else if #[cfg(target_pointer_width = "32")] { + //! fn foo() { /* non-unix, 32-bit functionality */ } + //! } else { + //! fn foo() { /* fallback implementation */ } + //! } + //! } + //! + //! # fn main() {} + //! ``` + + #![no_std] + +-#![doc(html_root_url = "http://alexcrichton.com/cfg-if")] ++#![doc(html_root_url = "https://docs.rs/cfg-if")] + #![deny(missing_docs)] + #![cfg_attr(test, deny(warnings))] + +-#[macro_export] ++#[macro_export(local_inner_macros)] + macro_rules! cfg_if { ++ // match if/else chains with a final `else` + ($( + if #[cfg($($meta:meta),*)] { $($it:item)* } + ) else * else { + $($it2:item)* + }) => { +- __cfg_if_items! { ++ cfg_if! { ++ @__items + () ; + $( ( ($($meta),*) ($($it)*) ), )* + ( () ($($it2)*) ), + } + }; ++ ++ // match if/else chains lacking a final `else` + ( + if #[cfg($($i_met:meta),*)] { $($i_it:item)* } + $( + else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } + )* + ) => { +- __cfg_if_items! { ++ cfg_if! { ++ @__items + () ; + ( ($($i_met),*) ($($i_it)*) ), + $( ( ($($e_met),*) ($($e_it)*) ), )* + ( () () ), + } +- } +-} ++ }; + +-#[macro_export] +-#[doc(hidden)] +-macro_rules! __cfg_if_items { +- (($($not:meta,)*) ; ) => {}; +- (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { +- __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* } +- __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* } +- } +-} ++ // Internal and recursive macro to emit all the items ++ // ++ // Collects all the negated cfgs in a list at the beginning and after the ++ // semicolon is all the remaining items ++ (@__items ($($not:meta,)*) ; ) => {}; ++ (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { ++ // Emit all items within one block, applying an approprate #[cfg]. The ++ // #[cfg] will require all `$m` matchers specified and must also negate ++ // all previous matchers. ++ cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } + +-#[macro_export] +-#[doc(hidden)] +-macro_rules! __cfg_if_apply { +- ($m:meta, $($it:item)*) => { ++ // Recurse to emit all other items in `$rest`, and when we do so add all ++ // our `$m` matchers to the list of `$not` matchers as future emissions ++ // will have to negate everything we just matched as well. ++ cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } ++ }; ++ ++ // Internal macro to Apply a cfg attribute to a list of items ++ (@__apply $m:meta, $($it:item)*) => { + $(#[$m] $it)* +- } ++ }; + } + + #[cfg(test)] +diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json +index c063d4c27534..7c1901dce515 100644 +--- a/third_party/rust/encoding_rs/.cargo-checksum.json ++++ b/third_party/rust/encoding_rs/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"f4c9b33981fe222ef322d640f5ef680828d75dcd534b8aa2bfdd576598deea64","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"ad140c9178067c8bdba8ae43ddffd0506d70d49474731247a050ff99a3ff7832","build.rs":"f5defca2c68b73e8723f489a9279af4fbe9724abc6e9abf58d32542e8a459e26","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"71aa7de1c5236a34ea0a8bb85332987751d2466b756fca6b3f6ac0da765cf91e","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e786de9e92e5652bc200266cf318753eea869e8971857cc0caa65a3cfe687545","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"f412f60f2d4afb7e32ffba94dc5f93716e6ae9f065799ca17bb1f1b2145f6ee4","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"565ceeffe81173b85700c55c396ab72068751ef809bea8e1cb1e6c7919f5a905","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"ab26ea900c8f7b7a4d1172872b7ca4bc573bc60b7b1979c93aafdfb86b2c2235"},"package":"a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"} +\ No newline at end of file ++{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"} +\ No newline at end of file +diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml +index 65fc8e8dffcd..e29f19fb9afe 100644 +--- a/third_party/rust/encoding_rs/Cargo.toml ++++ b/third_party/rust/encoding_rs/Cargo.toml +@@ -12,47 +12,47 @@ + + [package] + name = "encoding_rs" +-version = "0.8.14" ++version = "0.8.16" + authors = ["Henri Sivonen "] + description = "A Gecko-oriented implementation of the Encoding Standard" + homepage = "https://docs.rs/encoding_rs/" + documentation = "https://docs.rs/encoding_rs/" + readme = "README.md" + keywords = ["encoding", "web", "unicode", "charset"] + categories = ["text-processing", "encoding", "web-programming", "internationalization"] + license = "MIT/Apache-2.0" + repository = "https://github.com/hsivonen/encoding_rs" + [profile.release] + lto = true + [dependencies.cfg-if] + version = "0.1.0" + +-[dependencies.serde] +-version = "1.0" ++[dependencies.packed_simd] ++version = "0.3.3" + optional = true + +-[dependencies.simd] +-version = "0.2.3" ++[dependencies.serde] ++version = "1.0" + optional = true + [dev-dependencies.bincode] + version = "0.8" + + [dev-dependencies.serde_derive] + version = "1.0" + + [dev-dependencies.serde_json] + version = "1.0" + + [features] + fast-big5-hanzi-encode = [] + fast-gb-hanzi-encode = [] + fast-hangul-encode = [] + fast-hanja-encode = [] + fast-kanji-encode = [] + fast-legacy-encode = ["fast-hangul-encode", "fast-hanja-encode", "fast-kanji-encode", "fast-gb-hanzi-encode", "fast-big5-hanzi-encode"] + less-slow-big5-hanzi-encode = [] + less-slow-gb-hanzi-encode = [] + less-slow-kanji-encode = [] +-simd-accel = ["simd"] ++simd-accel = ["packed_simd", "packed_simd/into_bits"] + [badges.travis-ci] + repository = "hsivonen/encoding_rs" +diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md +index 3446efd0bb43..8a72b515450e 100644 +--- a/third_party/rust/encoding_rs/README.md ++++ b/third_party/rust/encoding_rs/README.md +@@ -126,17 +126,39 @@ There are currently these optional cargo features: + + ### `simd-accel` + +-Enables SSE2 acceleration on x86 and x86_64 and NEON acceleration on Aarch64 +-and ARMv7. _Enabling this cargo feature is recommended when building for x86, +-x86_64, ARMv7 or Aarch64._ The intention is for the functionality enabled by +-this feature to become the normal on-by-default behavior once +-[portable SIMD](https://github.com/rust-lang/rfcs/pull/2366) becames part of +-stable Rust. ++Enables SIMD acceleration using the nightly-dependent `packed_simd` crate. + +-Enabling this feature breaks the build unless the target is x86 with SSE2 +-(Rust's default 32-bit x86 target, `i686`, has SSE2, but Linux distros may +-use an x86 target without SSE2, i.e. `i586` in `rustup` terms), ARMv7 or +-thumbv7 with NEON (`-C target_feature=+neon`), x86_64 or Aarch64. ++This is an opt-in feature, because enabling this feature _opts out_ of Rust's ++guarantees of future compilers compiling old code (aka. "stability story"). ++ ++Currently, this has not been tested to be an improvement except for these ++targets: ++ ++* x86_64 ++* i686 ++* aarch64 ++* thumbv7neon ++ ++If you use nightly Rust, you use targets whose first component is one of the ++above, and you are prepared _to have to revise your configuration when updating ++Rust_, you should enable this feature. Otherwise, please _do not_ enable this ++feature. ++ ++_Note!_ If you are compiling for a target that does not have 128-bit SIMD ++enabled as part of the target definition and you are enabling 128-bit SIMD ++using `-C target_feature`, you need to enable the `core_arch` Cargo feature ++for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of ++using the standard-library copy of `core::arch`, because the `core::arch` ++module of the pre-compiled standard library has been compiled with the ++assumption that the CPU doesn't have 128-bit SIMD. At present this applies ++mainly to 32-bit ARM targets whose first component does not include the ++substring `neon`. ++ ++The encoding_rs side of things has not been properly set up for POWER, ++PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow ++the advice from the previous paragraph, you probably shouldn't use ++the `simd-accel` option on the less mainstream architectures at this ++time. + + Used by Firefox. + +@@ -382,6 +404,14 @@ To regenerate the generated code: + + ## Release Notes + ++### 0.8.16 ++ ++* Switch from the `simd` crate to `packed_simd`. ++ ++### 0.8.15 ++ ++* Adjust documentation for `simd-accel` (README-only release). ++ + ### 0.8.14 + + * Made UTF-16 to UTF-8 encode conversion fill the output buffer as +diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs +index 1b7adf780010..e687878081f7 100644 +--- a/third_party/rust/encoding_rs/build.rs ++++ b/third_party/rust/encoding_rs/build.rs +@@ -1,4 +1,12 @@ + fn main() { ++ // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`. ++ // You still need to knowingly have a setup that makes ++ // `packed_simd` compile. Therefore, having this file on ++ // crates.io is harmless in terms of users of `encoding_rs` ++ // accidentally depending on nightly features. Having this ++ // here means that if you knowingly want this, you only ++ // need to maintain a fork of `packed_simd` without _also_ ++ // having to maintain a fork of `encoding_rs`. + #[cfg(feature = "simd-accel")] + println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); + } +diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs +index d75b65d75ce3..08da62d20051 100644 +--- a/third_party/rust/encoding_rs/src/handles.rs ++++ b/third_party/rust/encoding_rs/src/handles.rs +@@ -34,7 +34,7 @@ use simd_funcs::*; + all(target_endian = "little", target_feature = "neon") + ) + ))] +-use simd::u16x8; ++use packed_simd::u16x8; + + use super::DecoderResult; + use super::EncoderResult; +diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs +index 912c349a0e25..23069375d6f8 100644 +--- a/third_party/rust/encoding_rs/src/lib.rs ++++ b/third_party/rust/encoding_rs/src/lib.rs +@@ -11,7 +11,7 @@ + feature = "cargo-clippy", + allow(doc_markdown, inline_always, new_ret_no_self) + )] +-#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.14")] ++#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")] + + //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation + //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust. +@@ -665,20 +665,21 @@ + //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes) + //! for discussion about the UTF-16 family. + +-#![cfg_attr(feature = "simd-accel", feature(platform_intrinsics, core_intrinsics))] ++#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))] + + #[macro_use] + extern crate cfg_if; + + #[cfg(all( + feature = "simd-accel", + any( + target_feature = "sse2", + all(target_endian = "little", target_arch = "aarch64"), + all(target_endian = "little", target_feature = "neon") + ) + ))] +-extern crate simd; ++#[macro_use(shuffle)] ++extern crate packed_simd; + + #[cfg(feature = "serde")] + extern crate serde; +diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs +index 6cd1a4448056..c5ee605c1b13 100644 +--- a/third_party/rust/encoding_rs/src/mem.rs ++++ b/third_party/rust/encoding_rs/src/mem.rs +@@ -228,8 +228,8 @@ macro_rules! by_unit_check_simd { + cfg_if! { + if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] { + use simd_funcs::*; +- use simd::u8x16; +- use simd::u16x8; ++ use packed_simd::u8x16; ++ use packed_simd::u16x8; + + const SIMD_ALIGNMENT: usize = 16; + +@@ -631,47 +631,42 @@ cfg_if! { + /// + /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_ascii(buffer: &[u8]) -> bool { + is_ascii_impl(buffer) + } + + /// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing + /// only ASCII characters). + /// + /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_basic_latin(buffer: &[u16]) -> bool { + is_basic_latin_impl(buffer) + } + + /// Checks whether the buffer is valid UTF-8 representing only code points + /// less than or equal to U+00FF. + /// + /// Fails fast. (I.e. returns before having read the whole buffer if UTF-8 + /// invalidity or code points above U+00FF are discovered. +-#[inline] + pub fn is_utf8_latin1(buffer: &[u8]) -> bool { + is_utf8_latin1_impl(buffer).is_none() + } + + /// Checks whether the buffer represents only code point less than or equal + /// to U+00FF. + /// + /// Fails fast. (I.e. returns before having read the whole buffer if code + /// points above U+00FF are discovered. +-#[inline] + pub fn is_str_latin1(buffer: &str) -> bool { + is_str_latin1_impl(buffer).is_none() + } + + /// Checks whether the buffer represents only code point less than or equal + /// to U+00FF. + /// + /// May read the entire buffer even if it isn't all-Latin1. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_utf16_latin1(buffer: &[u16]) -> bool { + is_utf16_latin1_impl(buffer) + } +@@ -1283,7 +1278,6 @@ pub fn is_str_bidi(buffer: &str) -> bool { + /// high surrogate that could be the high half of an RTL character. + /// Returns `false` if the input contains neither RTL characters nor + /// unpaired high surrogates that could be higher halves of RTL characters. +-#[inline] + pub fn is_utf16_bidi(buffer: &[u16]) -> bool { + is_utf16_bidi_impl(buffer) + } +@@ -1416,67 +1410,63 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool { + /// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi { + if let Some(offset) = is_utf8_latin1_impl(buffer) { + if is_utf8_bidi(&buffer[offset..]) { + Latin1Bidi::Bidi + } else { + Latin1Bidi::LeftToRight + } + } else { + Latin1Bidi::Latin1 + } + } + + /// Checks whether a valid UTF-8 buffer contains code points + /// that trigger right-to-left processing or is all-Latin1. + /// + /// Possibly more efficient than performing the checks separately. + /// + /// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi { + // The transition from the latin1 check to the bidi check isn't + // optimal but not tweaking it to perfection today. + if let Some(offset) = is_str_latin1_impl(buffer) { + if is_str_bidi(&buffer[offset..]) { + Latin1Bidi::Bidi + } else { + Latin1Bidi::LeftToRight + } + } else { + Latin1Bidi::Latin1 + } + } + + /// Checks whether a potentially invalid UTF-16 buffer contains code points + /// that trigger right-to-left processing or is all-Latin1. + /// + /// Possibly more efficient than performing the checks separately. + /// + /// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi { + check_utf16_for_latin1_and_bidi_impl(buffer) + } + + /// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced + /// with the REPLACEMENT CHARACTER. + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer _plus one_. + /// + /// Returns the number of `u16`s written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { + // TODO: Can the requirement for dst to be at least one unit longer + // be eliminated? +@@ -1516,7 +1506,6 @@ pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize { + assert!( + dst.len() >= src.len(), +@@ -1683,7 +1672,6 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize { + /// not allocating memory for the worst case up front. Specifically, + /// if the input starts with or ends with an unpaired surrogate, those are + /// replaced with the REPLACEMENT CHARACTER. +-#[inline] + pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) { + let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() }; + let (read, written) = convert_utf16_to_utf8_partial(src, bytes); +@@ -1727,7 +1715,6 @@ pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { + assert!( + dst.len() >= src.len(), +@@ -1755,7 +1742,6 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { + /// indicated by the return value, so using a `&mut str` interpreted as + /// `&mut [u8]` as the destination is not safe. If you want to convert into + /// a `&mut str`, use `convert_utf16_to_str()` instead of this function. +-#[inline] + pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) { + let src_len = src.len(); + let src_ptr = src.as_ptr(); +@@ -1894,7 +1880,6 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize { + /// + /// If debug assertions are enabled (and not fuzzing) and the input is + /// not in the range U+0000 to U+00FF, inclusive. +-#[inline] + pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), +@@ -1957,7 +1942,6 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { + /// + /// (Probably in future versions if debug assertions are enabled (and not + /// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.) +-#[inline] + pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) { + assert!( + dst.len() >= src.len(), +@@ -2030,7 +2014,6 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> { + + /// Returns the index of the first unpaired surrogate or, if the input is + /// valid UTF-16 in its entirety, the length of the input. +-#[inline] + pub fn utf16_valid_up_to(buffer: &[u16]) -> usize { + utf16_valid_up_to_impl(buffer) + } +@@ -2060,61 +2043,58 @@ pub fn ensure_utf16_validity(buffer: &mut [u16]) { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), + "Destination must not be shorter than the source." + ); + if let Some((_, consumed)) = + unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } + { + consumed + } else { + src.len() + } + } + + /// Copies ASCII from source to destination zero-extending it to UTF-16 up to + /// the first non-ASCII byte (or the end of the input if it is ASCII in its + /// entirety). + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer. + /// + /// Returns the number of `u16`s written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize { + assert!( + dst.len() >= src.len(), + "Destination must not be shorter than the source." + ); + if let Some((_, consumed)) = + unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) } + { + consumed + } else { + src.len() + } + } + + /// Copies Basic Latin from source to destination narrowing it to ASCII up to + /// the first non-Basic Latin code unit (or the end of the input if it is + /// Basic Latin in its entirety). + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer. + /// + /// Returns the number of bytes written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), +diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs +index 0cc05baf784d..4e19b0e8a07e 100644 +--- a/third_party/rust/encoding_rs/src/simd_funcs.rs ++++ b/third_party/rust/encoding_rs/src/simd_funcs.rs +@@ -7,9 +7,9 @@ + // option. This file may not be copied, modified, or distributed + // except according to those terms. + +-use simd::u16x8; +-use simd::u8x16; +-use simd::Simd; ++use packed_simd::u16x8; ++use packed_simd::u8x16; ++use packed_simd::FromBits; + + // TODO: Migrate unaligned access to stdlib code if/when the RFC + // https://github.com/rust-lang/rfcs/pull/1725 is implemented. +@@ -62,81 +62,79 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { + *(ptr as *mut u16x8) = s; + } + +-extern "platform-intrinsic" { +- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; ++cfg_if! { ++ if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { ++ use std::arch::x86_64::__m128i; ++ use std::arch::x86_64::_mm_movemask_epi8; ++ use std::arch::x86_64::_mm_packus_epi16; ++ } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { ++ use std::arch::x86::__m128i; ++ use std::arch::x86::_mm_movemask_epi8; ++ use std::arch::x86::_mm_packus_epi16; ++ } else if #[cfg(target_arch = "aarch64")]{ ++ use std::arch::aarch64::uint8x16_t; ++ use std::arch::aarch64::uint16x8_t; ++ use std::arch::aarch64::vmaxvq_u8; ++ use std::arch::aarch64::vmaxvq_u16; ++ } else { ++ ++ } + } + + // #[inline(always)] + // fn simd_byte_swap_u8(s: u8x16) -> u8x16 { + // unsafe { +-// simd_shuffle16(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) ++// shuffle!(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) + // } + // } + + // #[inline(always)] + // pub fn simd_byte_swap(s: u16x8) -> u16x8 { + // to_u16_lanes(simd_byte_swap_u8(to_u8_lanes(s))) + // } + + #[inline(always)] + pub fn simd_byte_swap(s: u16x8) -> u16x8 { + let left = s << 8; + let right = s >> 8; + left | right + } + + #[inline(always)] + pub fn to_u16_lanes(s: u8x16) -> u16x8 { +- unsafe { ::std::mem::transmute(s) } ++ u16x8::from_bits(s) + } + +-// #[inline(always)] +-// pub fn to_u8_lanes(s: u16x8) -> u8x16 { +-// unsafe { ::std::mem::transmute(s) } +-// } +- + cfg_if! { + if #[cfg(target_feature = "sse2")] { + +- use simd::i16x8; +- use simd::i8x16; +- extern "platform-intrinsic" { +- fn x86_mm_movemask_epi8(x: i8x16) -> i32; +- } +- + // Expose low-level mask instead of higher-level conclusion, + // because the non-ASCII case would perform less well otherwise. + #[inline(always)] + pub fn mask_ascii(s: u8x16) -> i32 { + unsafe { +- let signed: i8x16 = ::std::mem::transmute_copy(&s); +- x86_mm_movemask_epi8(signed) ++ _mm_movemask_epi8(__m128i::from_bits(s)) + } + } + + } else { + + } + } + + cfg_if! { + if #[cfg(target_feature = "sse2")] { + #[inline(always)] + pub fn simd_is_ascii(s: u8x16) -> bool { + unsafe { +- let signed: i8x16 = ::std::mem::transmute_copy(&s); +- x86_mm_movemask_epi8(signed) == 0 ++ _mm_movemask_epi8(__m128i::from_bits(s)) == 0 + } + } + } else if #[cfg(target_arch = "aarch64")]{ +- extern "platform-intrinsic" { +- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; +- } +- + #[inline(always)] + pub fn simd_is_ascii(s: u8x16) -> bool { + unsafe { +- aarch64_vmaxvq_u8(s) < 0x80 ++ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80 + } + } + } else { +@@ -164,35 +162,31 @@ cfg_if! { + #[inline(always)] + pub fn simd_is_str_latin1(s: u8x16) -> bool { + unsafe { +- aarch64_vmaxvq_u8(s) < 0xC4 ++ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4 + } + } + } else { + #[inline(always)] + pub fn simd_is_str_latin1(s: u8x16) -> bool { + let above_str_latin1 = u8x16::splat(0xC4); + s.lt(above_str_latin1).all() + } + } + } + + cfg_if! { + if #[cfg(target_arch = "aarch64")]{ +- extern "platform-intrinsic" { +- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; +- } +- + #[inline(always)] + pub fn simd_is_basic_latin(s: u16x8) -> bool { + unsafe { +- aarch64_vmaxvq_u16(s) < 0x80 ++ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80 + } + } + + #[inline(always)] + pub fn simd_is_latin1(s: u16x8) -> bool { + unsafe { +- aarch64_vmaxvq_u16(s) < 0x100 ++ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100 + } + } + } else { +@@ -225,7 +219,7 @@ cfg_if! { + macro_rules! aarch64_return_false_if_below_hebrew { + ($s:ident) => ({ + unsafe { +- if aarch64_vmaxvq_u16($s) < 0x0590 { ++ if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 { + return false; + } + } +@@ -292,47 +286,38 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { + #[inline(always)] + pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) { + unsafe { +- let first: u8x16 = simd_shuffle16( ++ let first: u8x16 = shuffle!( + s, + u8x16::splat(0), +- [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], ++ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); +- let second: u8x16 = simd_shuffle16( ++ let second: u8x16 = shuffle!( + s, + u8x16::splat(0), +- [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], ++ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); +- ( +- ::std::mem::transmute_copy(&first), +- ::std::mem::transmute_copy(&second), +- ) ++ (u16x8::from_bits(first), u16x8::from_bits(second)) + } + } + + cfg_if! { + if #[cfg(target_feature = "sse2")] { +- extern "platform-intrinsic" { +- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; +- } +- + #[inline(always)] + pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { + unsafe { +- let first: i16x8 = ::std::mem::transmute_copy(&a); +- let second: i16x8 = ::std::mem::transmute_copy(&b); +- x86_mm_packus_epi16(first, second) ++ u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b))) + } + } + } else { + #[inline(always)] + pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { + unsafe { +- let first: u8x16 = ::std::mem::transmute_copy(&a); +- let second: u8x16 = ::std::mem::transmute_copy(&b); +- simd_shuffle16( ++ let first = u8x16::from_bits(a); ++ let second = u8x16::from_bits(b); ++ shuffle!( + first, + second, +- [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30], ++ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ) + } + } +diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs +index 6d0d613fa093..2d2076987984 100644 +--- a/third_party/rust/encoding_rs/src/x_user_defined.rs ++++ b/third_party/rust/encoding_rs/src/x_user_defined.rs +@@ -14,15 +14,12 @@ use variant::*; + cfg_if! { + if #[cfg(feature = "simd-accel")] { + use simd_funcs::*; +- use simd::u16x8; ++ use packed_simd::u16x8; + + #[inline(always)] + fn shift_upper(unpacked: u16x8) -> u16x8 { + let highest_ascii = u16x8::splat(0x7F); +- let offset = u16x8::splat(0xF700); +- let mask = unpacked.gt(highest_ascii).to_repr().to_u16(); +- unpacked + (offset & mask) +- } ++ unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } + } else { + } + } +diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml +new file mode 100644 +index 000000000000..0388cee0a07b +--- /dev/null ++++ b/third_party/rust/packed_simd/.appveyor.yml +@@ -0,0 +1,59 @@ ++matrix: ++ allow_failures: ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72 ++ - TARGET: i686-pc-windows-msvc ++ - TARGET: i686-pc-windows-gnu ++ - TARGET: x86_64-pc-windows-gnu ++ fast_finish: true ++ ++environment: ++ matrix: ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+sse4.2" ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+avx" ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+avx2" ++ NOVERIFY: "1" ++ ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+sse4.2" ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+avx" ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+avx2" ++ NOVERIFY: "1" ++ ++ - TARGET: x86_64-pc-windows-gnu ++ MSYSTEM: MINGW64 ++ ++ - TARGET: i686-pc-windows-gnu ++ MSYSTEM: MINGW32 ++ - TARGET: x86_64-pc-windows-gnu ++ MSYSTEM: MINGW64 ++install: ++ - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" } ++ - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe" ++ - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null ++ - ps: $env:PATH="$env:PATH;C:\rust\bin" ++ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% ++ - rustc -vV ++ - cargo -vV ++build: false ++test_script: bash -c "ci/run.sh" +diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json +new file mode 100644 +index 000000000000..01afcc1efdac +--- /dev/null ++++ b/third_party/rust/packed_simd/.cargo-checksum.json +@@ -0,0 +1 @@ ++{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".travis.yml":"e9258d9a54fdaf4cbc12405fe5993ac4497eb2b29021691dbc91b19cb9b52227","Cargo.toml":"089941ba3c89ea111cbea3cc3abdcdcf2b9d0ae0db268d7269ee38226db950e5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"f3baefc5e5bb9b250e762a1466371b922fd7ee4243c217b2d014307603c2f57a","ci/all.sh":"a23d14e10cb26a0eb719e389c30eb955fa53cddcd436890646df09af640bd2eb","ci/android-install-ndk.sh":"0f1746108cc30bf9b9ba45bcde7b19fc1a8bdf5b0258035b4eb8dc69b75efac4","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"1ecdac757101d951794fb2ab0deaa278199cf25f2e08a15c7d40ff31a8556184","ci/docker/arm-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"e25d88f6c0c94aada3d2e3f08243f755feb7e869dc5dc505b3799719cb1af591","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"f126f4c7bae8c11ab8b16df06ad997863f0838825a9c08c9899a3eedb6d570bd","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"b647545c158ee480a4c581dbdc1f57833aef056c8d498acc04b573e842bf803c","ci/docker/i586-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/i686-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/mips-unknown-linux-gnu/Dockerfile":"323776469bb7b160385f3621d66e3ee14c75242f8180f916e65af048a29d4ea0","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"c647f6948a9a43b0be695cbed4eac752120d0faf28e5e69c718cb10406921dab","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"77bfd00cc8639509be381b394f077e39b45a00158ad61b4e1656714c714665d1","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"ec5bea6c98a3b626731fdb95f9ff2d1182639c76e8fb16d3271d0fc884901524","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"4f2b662de66e83d1354f650b7077692309637f786c2ea5516c31b5c2ee10af2d","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"a9595402b772bc365982e22a0096a8988825d90b09b5faa97ab192e76072f71d","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"df3c381c157439695ae8cd10ab71664702c061e3b4ab22906a5ad6c2680acfed","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"93fb44df3d7fd31ead158570667c97b5076a05c3d968af4a84bc13819a8f2db8","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"da1c39a3ff1fe22e41395fa7c8934e90b4c1788e551b9aec6e38bfd94effc437","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"75c0c56161c7382b439de74c00de1c0e3dc9d59560cd6720976a751034b78714","ci/docker/wasm32-unknown-unknown/Dockerfile":"3e5f294bc1e004aa599086c2af49d6f3e7459fa250f5fbdd60cf67d53db78758","ci/docker/x86_64-linux-android/Dockerfile":"685040273cf350d5509e580ac451555efa19790c8723ca2af066adadc6880ad2","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"d253c86803b22da428fa9cc671a05f18d3318eca7733b8dccb4f7be1ddf524c5","ci/dox.sh":"5b61711be47a4e3dde0ddd15ba73d256ea95fd75af3897732c24db1dc7e66366","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"63259e22a96ba539f53c06b1b39f53e3a78a71171652e7afc170836110ccd913","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"73fb981a8fdb1dcd54409d3c0fbbfb8f77a3ceabf8626a6b9bf9d21d6bc8ce72","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2cc8c9c560ae17867e69b06d09b758dbf7bc39eb774ada50a743724b10acc0a2","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"ab49712e9293a65d74d540ba4784fcb57ff1119ec05a575d895c071f1a620f64","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","readme.md":"585a8f0e16877fb9abb00cd17a175fcb9d7857840c6c61209f1827ffab095070","rustfmt.toml":"de6101d0670bad65fb3b337d56957d2a024e017e5ab146ec784d77312daaf8ff","src/api.rs":"331a3a4abb19cee2df5f2df4ad7c3e88b45e62cf23fdacfc9bbaa633dc5cf788","src/api/bit_manip.rs":"e68290ee679cc5abc9c73afbe635c1035f8cbfe849e5c751a1680e459244c39e","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"63e28c6a3edf1a7a635f51b8d3c6adbb1d46f884d92a196b3d4a6e743d809416","src/api/cast/v16.rs":"2a584eeb57fd47baad6f3533764301b04aaaac23702b7a8db12598ac02899262","src/api/cast/v256.rs":"b91c15ed8d1536ecd97b4eb79ff9d5aba0552cd9b6f0ea6435b05f2273e23b3a","src/api/cast/v32.rs":"62ec89fcce7fa7f28497ee5770adc8f81d2d3a6b2925b02f7dc06504c40e8f38","src/api/cast/v512.rs":"d855cb943ae7106e9599ef38e30a3afb1c6bd5433178baca54cb128fd9a7d143","src/api/cast/v64.rs":"fe0f7dfaf4fc0c0c1a78c96fcfcdfdc2a1e2845843b11aa797a0c6fb52a8f774","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"3ed23d2a930b0f9750c3a5309da766b03dc4f9c4d375b42ad3c50fe732693d15","src/api/cmp/partial_ord.rs":"e16b11805c94048acd058c93994b5bc74bb187f8d7e3b86a87df60e1601467f9","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"b61f92fc0e33a2633b3375eb405beba480da071cde03df4d437d8a6058afcd97","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"35cb5c266197d6224d598fb3d286e5fe48ef0c01ed356c2ff6fe9ba946f96a92","src/api/fmt/debug.rs":"aa18eea443bf353fea3db8b1a025132bbcaf91e747ecfa43b8d9fce9af395a0c","src/api/fmt/lower_hex.rs":"69d5be366631af309f214e8031c8c20267fcc27a695eac6f45c6bc1df72a67e6","src/api/fmt/octal.rs":"9eb11ba3d990213f3c7f1ec25edba7ce997cb1320e16d308c83498ba6b9bfbd9","src/api/fmt/upper_hex.rs":"a4637d085b7bb20e759ce58e08435b510a563ba3dd468af2b03560fdc5511562","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"4151593c7bba7455821fffa5b59867005a77c95d32f1f0cc3fd87294000157d9","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"562cfa3f1d8eb9a733c035a3665a599c2f1e341ee820d8fbdd102a4398a441bc","src/api/into_bits.rs":"82297f0697d67b5a015e904e7e6e7b2a7066ba825bc54b94b4ff3e22d7a1eefb","src/api/into_bits/arch_specific.rs":"1f925390b0ce7132587d95f2419c6e2ad3e1a9d17eb1d9c120a1c1c4bdf4277e","src/api/into_bits/macros.rs":"d762406de25aedff88d460dec7a80dc8e825a2a419d53218ce007efa6a1d3e04","src/api/into_bits/v128.rs":"ecdc5893664c71d7ab1ff3697c3fbe490d20d8748b9b76881d05e7625e40d74c","src/api/into_bits/v16.rs":"5459ec7dad1ad7bd30dc7e48374580b993abf23701d9c3cb22203fa0a9aabb6d","src/api/into_bits/v256.rs":"90ea351da0380ead1bf0f63b620afd40d01d638d09f7e7be31840bd2c1d9c663","src/api/into_bits/v32.rs":"ee1dc5a430050e16f51154b5fe85b1536f5feddf2ea23dd1d3859b67c4afc6fc","src/api/into_bits/v512.rs":"f72098ed1c9a23944f3d01abaf5e0f2d0e81d35a06fdadd2183e896d41b59867","src/api/into_bits/v64.rs":"6394462facdfe7827349c742b7801f1291e75a720dfb8c0b52100df46f371c98","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"78acba000d3fa527111300b6327c1932de9c4c1e02d4174e1a5615c01463d38c","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"c501a6696950cf5e521765f178de548af64fdfb6e10d026616d09fab93ca2d17","src/api/minimal/mask.rs":"42e415f536c5193d0218f5a754b34b87fd7c971bff068009f958712166ff056d","src/api/minimal/ptr.rs":"a9ee482d1dd1c956fb8f3f179e6e620b1de4e9d713961461d4c6923a4ef2e67c","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"987f8fdebeedc16e3d77c1b732e7826ef70633c541d16dfa290845d5c6289150","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"f5155dce75219f4ba11275b1f295d2fdcddd49d174a6f1fb2ace7ea42813ce41","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"03cbe8a400fd7c688e4ee771a990a6754f2031b1a59b19ae81158b21471167e5","src/api/ops/vector_shifts.rs":"9bf69d0087268f61009e39aea52e03a90f378910206b6a28e8393178b6a5d0e0","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"9ddd960365e050674b25b2fd3116e24d94669b4375d74e71c03e3f1469576066","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"e58c8c87806a95df2b2b5b48ac5991036df024096d9d7c171a480fe9282896a4","src/api/reductions/integer_arithmetic.rs":"47471da1c5f859489680bb5d34ced3d3aa20081c16053a3af121a4496fcb57bf","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"f27be3aa28e1c1f46de7890198db6e12f00c207085e89ef2de7e57ee443cdb98","src/api/select.rs":"a98e2ccf9fc6bdeed32d337c8675bc96c2fbe2cc34fbf149ad6047fb8e749774","src/api/shuffle.rs":"da58200790868c09659819322a489929a5b6e56c596ed07e6a44293ea02e7d09","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"4d4fe8a329c885fcb4fbcbedf99efb15a95296fe6b3f595056cc37037450d5ac","src/api/slice/write_to_slice.rs":"f5b23b2c4b91cfb26b713a9013a6c0da7f45eaefb79ba06dcbc27f3f23bda679","src/api/swap_bytes.rs":"4a6792a2e49a77475e1b237592b4b2804dbddb79c474331acd0dd71b36934259","src/codegen.rs":"c6eebc3d3665420aa6a2f317977e3c41a4f43e0550ac630cdbe8e4bbed5e2031","src/codegen/bit_manip.rs":"5559e095105a80003e0de35af1d19b0c65c9ab04eb743c7e01c5442d882eb34e","src/codegen/llvm.rs":"d1299c189abb17a6133f047574cffc7a6db4c1be37cb7d4785491cb5e8f8cf54","src/codegen/math.rs":"35f96e37a78fcf0cdb02146b7f27a45108fe06a37fc2a54d8851ce131a326178","src/codegen/math/float.rs":"dd86c0449e576c83b719700962ac017c332987fac08d91f2b7a2b1b883598170","src/codegen/math/float/abs.rs":"f56e2b4b8055ea861c1f5cbc6b6e1d8e7e5af163b62c13574ddee4e09513bfbc","src/codegen/math/float/cos.rs":"ef3b511a24d23045b310315e80348a9b7fedb576fc2de52d74290616a0abeb2a","src/codegen/math/float/cos_pi.rs":"4e7631a5d73dac21531e09ef1802d1180f8997509c2c8fa9f67f322194263a97","src/codegen/math/float/exp.rs":"61b691598c41b5622f24e4320c1bdd08701e612a516438bdddcc728fc3405c8c","src/codegen/math/float/ln.rs":"46b718b1ba8c9d99e1ad40f53d20dfde08a3063ca7bd2a9fdd6698e060da687e","src/codegen/math/float/macros.rs":"dd42135fff13f9aca4fd3a1a4e14c7e6c31aadc6d817d63b0d2fb9e62e062744","src/codegen/math/float/mul_add.rs":"a37bf764345d4b1714f97e83897b7cf0855fc2811704bcbc0012db91825339e1","src/codegen/math/float/mul_adde.rs":"c75702bfcb361de45964a93caf959a695ef2376bd069227600b8c6872665c755","src/codegen/math/float/powf.rs":"642346e982bc4c39203de0864d2149c4179cd7b21cf67a2951687932b4675872","src/codegen/math/float/sin.rs":"9d68164c90cdca6a85155040cdac42e27342ebe0b925273ef1593df721af4258","src/codegen/math/float/sin_cos_pi.rs":"9be02ad48585a1e8d99129382fbffbaed47852f15459256a708850b6b7a75405","src/codegen/math/float/sin_pi.rs":"9890347905b4d4a3c7341c3eb06406e46e60582bcf6960688bd727e5dadc6c57","src/codegen/math/float/sqrt.rs":"e3c60dcfb0c6d2fc62adabcc931b2d4040b83cab294dea36443fb4b89eb79e34","src/codegen/math/float/sqrte.rs":"f0f4ef9eb475ae41bcc7ec6a95ad744ba6b36925faa8b2c2814004396d196b63","src/codegen/pointer_sized_int.rs":"a70697169c28218b56fd2e8d5353f2e00671d1150d0c8cef77d613bdfacd84cb","src/codegen/reductions.rs":"645e2514746d01387ddd07f0aa4ffd8430cc9ab428d4fb13773ea319fa25dd95","src/codegen/reductions/mask.rs":"8f1afe6aabf096a3278e1fc3a30f736e04aa8b9ce96373cee22162d18cfe2702","src/codegen/reductions/mask/aarch64.rs":"cba6e17603d39795dcfe8339b6b7d8714c3e162a1f0a635979f037aa24fe4206","src/codegen/reductions/mask/arm.rs":"9447904818aa2c7c25d0963eead452a639a11ca7dbd6d21eedbfcaade07a0f33","src/codegen/reductions/mask/fallback.rs":"7a0ef9f7fd03ae318b495b95e121350cd61caffc5cc6ee17fabf130d5d933453","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"14bd2c482071f2355beebcf7b7ecf950ff2dfcdb08c3ca50993092434a9de717","src/codegen/reductions/mask/x86/avx.rs":"b4913d87844c522903641cbbf10db4551addb1ce5e9e78278e21612fa65c733b","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"226610b4ff88c676d5187114dd57b4a8800de6ce40884675e9198445b1ed0306","src/codegen/reductions/mask/x86/sse2.rs":"bc38e6c31cb4b3d62147eba6cac264e519e2a48e0f7ce9010cfa9ef0cf0ec9fd","src/codegen/shuffle.rs":"0abca97e92cdce49a58a39cc447eb09dc7d7715ef256c8dbd2181a186e61bb64","src/codegen/shuffle1_dyn.rs":"04523e9338133bdedb012dd076c2c564b79ce5593b0fc56d0fb6910e04190a81","src/codegen/swap_bytes.rs":"1d6cdc716eadddc92b4fd506b2445a821caa8dc00860447de09d7ebd69c2087f","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"96d609a9eece4dcbbcc01ba0b8744d7f5958be12774176a2945bc676f4e6b5cb","src/codegen/vSize.rs":"eeee9858749aa82142b27bc120d1989bb74a6b82e1e4efbbeaccc9634dc9acfc","src/lib.rs":"1b5d419ff05ee0370d671810423ccc254708cc8d415c1dbac2a7a36be4bf63a8","src/masks.rs":"870f429967b2d7d5133f4d28d6c753fc5cef0570b27b29d4e966a066d22d2d0e","src/sealed.rs":"ff7f0324276408ae8249941cfa32c90b8835a54d750896b683efea857af19db2","src/testing.rs":"1d3a7862ef625e235a5734ad7204e68d350f902c0695182b1f08a0552432416e","src/testing/macros.rs":"6378856d7a40ba5ec5c7c0dad6327d79f0c77266921c24296d10aed6c68e9b98","src/testing/utils.rs":"d6fd5a5017f1f85d9d99585754f8f6ad06fc3d683b34083543e67a7cc6c1772c","src/v128.rs":"18fe263c4aa28cd06461c7070b0269f69f4a2e75749b8f142a83dfdfe4d22bf5","src/v16.rs":"e5c663c9fb3547eaeac78a5f7db9969f4d8b5ec96112bf2954602fff11f0aebd","src/v256.rs":"68732cd688ad12a56d8b4f8ddf279f77bdfe1be2943c7dc0c1b4f1a76798aa0f","src/v32.rs":"785b22a1ccb4a41bb53dfeb0670f624c0ce42e6cdf62d1747e3283777a1c70bd","src/v512.rs":"d1337bfe07f06a8f37f8e8fa7d4315b9307476ee435ad80dd5269eaed564fbfa","src/v64.rs":"3077468d65125b8f085e9454c8b2463a4d5225697464ba6a1300f8799528fd4b","src/vPtr.rs":"c9a53f41f466e17b6648a4ce390fd8f4d3a848d440eb8a9a803a11608d76eb05","src/vSize.rs":"5c46d3e8c3ee5863d9b6e37e681f871386e0efc254d6d84ba711edb529ce7b3c","tests/endianness.rs":"541a144be017e3dd7da7c8ea49d907dc02538245e8c5f3deb5bd43da92c929e1"},"package":null} +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml +new file mode 100644 +index 000000000000..8d8ed54ab737 +--- /dev/null ++++ b/third_party/rust/packed_simd/.travis.yml +@@ -0,0 +1,308 @@ ++language: rust ++sudo: false ++rust: nightly ++ ++stages: ++ - tools ++ - linux-tier1 ++ - osx-tier1 ++ - osx-tier2 ++ - linux-tier2 ++ - android ++ ++matrix: ++ fast_finish: true ++ include: ++ # Android: ++ - env: TARGET=x86_64-linux-android NOVERIFY=1 ++ name: "x86_64-unknown-linux-android + SSE2" ++ stage: android ++ - env: TARGET=arm-linux-androideabi ++ name: "arm-linux-androideabi" ++ stage: android ++ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-linux-androideabi + NEON" ++ stage: android ++ - env: TARGET=aarch64-linux-android ++ name: "aarch64-unknown-linux-android" ++ stage: android ++ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" ++ name: "aarch64-unknown-linux-android + NEON" ++ stage: android ++ - env: TARGET="thumbv7neon-linux-androideabi" ++ name: "thumbv7neon-linux-androideabi" ++ stage: android ++ # Linux: ++ - env: TARGET=i586-unknown-linux-gnu ++ name: "i586-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse" ++ name: "i586-unknown-linux-gnu + SSE" ++ stage: linux-tier2 ++ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2" ++ name: "i586-unknown-linux-gnu + SSE2" ++ stage: linux-tier2 ++ - env: TARGET=i686-unknown-linux-gnu ++ name: "i686-unknown-linux-gnu + SSE2" ++ stage: linux-tier1 ++ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "i686-unknown-linux-gnu + SSE4.2" ++ stage: linux-tier1 ++ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" ++ name: "i686-unknown-linux-gnu + AVX2" ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu ++ name: "x86_64-unknown-linux-gnu + SSE2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "x86_64-unknown-linux-gnu + SSE4.2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx" ++ name: "x86_64-unknown-linux-gnu + AVX" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" ++ name: "x86_64-unknown-linux-gnu + AVX2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated ++ name: "Intel SDE + SSE2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "Intel SDE + SSE4.2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx" ++ name: "Intel SDE + AVX" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2" ++ name: "Intel SDE + AVX2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f" ++ name: "Intel SDE + AVX-512" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=arm-unknown-linux-gnueabi ++ name: "arm-unknown-linux-gnueabi" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-unknown-linux-gnueabi + NEON" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabihf ++ name: "arm-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-unknown-linux-gnueabihf + NEON" ++ stage: linux-tier2 ++ - env: TARGET=armv7-unknown-linux-gnueabihf ++ name: "armv7-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" ++ name: "armv7-unknown-linux-gnueabihf + NEON" ++ stage: linux-tier2 ++ - env: TARGET="thumbv7neon-unknown-linux-gnueabihf" ++ name: "thumbv7neon-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=aarch64-unknown-linux-gnu ++ name: "aarch64-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon" ++ name: "aarch64-unknown-linux-gnu + NEON" ++ stage: linux-tier2 ++ - env: TARGET=mips-unknown-linux-gnu ++ name: "mips-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=mipsel-unknown-linux-musl ++ name: "mipsel-unknown-linux-musl" ++ stage: linux-tier2 ++ - env: TARGET=mips64-unknown-linux-gnuabi64 ++ name: "mips64-unknown-linux-gnuabi64" ++ stage: linux-tier2 ++ - env: TARGET=mips64el-unknown-linux-gnuabi64 ++ name: "mips64el-unknown-linux-gnuabi64" ++ stage: linux-tier2 ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18 ++ # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6" ++ - env: TARGET=powerpc-unknown-linux-gnu ++ name: "powerpc-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64-unknown-linux-gnu ++ name: "powerpc64-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu ++ name: "powerpc64le-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" ++ name: "powerpc64le-unknown-linux-gnu + ALTIVEC" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" ++ name: "powerpc64le-unknown-linux-gnu + VSX" ++ stage: linux-tier2 ++ - env: TARGET=s390x-unknown-linux-gnu ++ name: "s390x-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=sparc64-unknown-linux-gnu ++ name: "sparc64-unknown-linux-gnu" ++ stage: linux-tier2 ++ # WebAssembly: ++ - env: TARGET=wasm32-unknown-unknown ++ name: "wasm32-unknown-unknown" ++ stage: osx-tier1 # For now ++ # MacOSX: ++ - os: osx ++ env: TARGET=i686-apple-darwin ++ name: "i686-apple-darwin + SSE2" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "i686-apple-darwin + SSE4.2" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # Travis-CI OSX build bots do not support AVX2: ++ - os: osx ++ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx" ++ name: "i686-apple-darwin + AVX" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=x86_64-apple-darwin ++ name: "x86_64-apple-darwin + SSE2" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "x86_64-apple-darwin + SSE4.2" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # Travis-CI OSX build bots do not support AVX2: ++ - os: osx ++ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx" ++ name: "x86_64-apple-darwin + AVX" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # *BSDs: ++ #- env: TARGET=i686-unknown-freebsd NORUN=1 ++ # script: ci/run.sh ++ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 ++ # script: ci/run.sh ++ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 ++ # script: ci/run.sh ++ # Solaris: ++ #- env: TARGET=x86_64-sun-solaris NORUN=1 ++ # script: ci/run.sh ++ # iOS: ++ - os: osx ++ env: TARGET=i386-apple-ios ++ name: "i386-apple-ios" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=x86_64-apple-ios ++ name: "x86_64-apple-ios + SSE2" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=armv7-apple-ios NORUN=1 ++ name: "armv7-apple-ios [Build only]" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=aarch64-apple-ios NORUN=1 ++ name: "aarch64-apple-ios [Build only]" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ # BENCHMARKS: ++ - name: "Benchmarks - x86_64-unknown-linux-gnu" ++ install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh ++ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh ++ stage: tools ++ - name: "Benchmarks - x86_64-apple-darwin" ++ install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh ++ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh ++ os: osx ++ osx_image: xcode9.4 ++ stage: tools ++ # TOOLS: ++ - name: "Documentation" ++ install: cargo install mdbook ++ script: ci/dox.sh ++ stage: tools ++ - name: "rustfmt" ++ install: true ++ before_script: rustup component add rustfmt-preview ++ script: ci/all.sh check_fmt || true ++ stage: tools ++ - name: "clippy" ++ install: true ++ before_script: rustup component add clippy-preview ++ script: ci/all.sh clippy ++ stage: tools ++ ++ allow_failures: ++ # FIXME: ISPC cannot be found? ++ - name: "Benchmarks - x86_64-apple-darwin" ++ # FIXME: TBD ++ - env: TARGET=powerpc-unknown-linux-gnu ++ - env: TARGET=powerpc64-unknown-linux-gnu ++ - env: TARGET=powerpc64le-unknown-linux-gnu ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" ++ #- env: TARGET=i686-unknown-freebsd NORUN=1 ++ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 ++ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 ++ #- env: TARGET=x86_64-sun-solaris NORUN=1 ++ ++ # FIXME: TBD ++ - env: TARGET=arm-linux-androideabi ++ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=aarch64-linux-android ++ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" ++ ++ # FIXME: iOS ++ # https://github.com/rust-lang-nursery/packed_simd/issues/26 ++ - env: TARGET=i386-apple-ios ++ - env: TARGET=x86_64-apple-ios ++ ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182 ++ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" ++ ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183 ++ - env: TARGET=wasm32-unknown-unknown ++ ++install: travis_retry rustup target add $TARGET ++before_script: cargo generate-lockfile ++script: travis_wait 50 ci/run-docker.sh ++after_script: sleep 5 ++ ++env: ++ global: ++ secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo=" ++branches: ++ only: ++ - staging # bors r+ ++ - trying # bors try ++ - master ++notifications: ++ email: ++ on_success: never +diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml +new file mode 100644 +index 000000000000..3db9354c9407 +--- /dev/null ++++ b/third_party/rust/packed_simd/Cargo.toml +@@ -0,0 +1,42 @@ ++[package] ++name = "packed_simd" ++version = "0.3.3" ++authors = ["Gonzalo Brito Gadeschi "] ++description = "Portable Packed SIMD vectors" ++documentation = "https://docs.rs/crate/packed_simd/" ++homepage = "https://github.com/rust-lang-nursery/packed_simd" ++repository = "https://github.com/rust-lang-nursery/packed_simd" ++keywords = ["simd", "vector", "portability"] ++categories = ["hardware-support", "concurrency", "no-std", "data-structures"] ++license = "MIT/Apache-2.0" ++build = "build.rs" ++edition = "2018" ++ ++[badges] ++appveyor = { repository = "rust-lang-nursery/packed_simd" } ++travis-ci = { repository = "rust-lang-nursery/packed_simd" } ++codecov = { repository = "rust-lang-nursery/packed_simd" } ++is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } ++is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } ++maintenance = { status = "experimental" } ++ ++[dependencies] ++cfg-if = "^0.1.6" ++core_arch = { version = "^0.1.3", optional = true } ++ ++[features] ++default = [] ++into_bits = [] ++libcore_neon = [] ++ ++[dev-dependencies] ++paste = "^0.1.3" ++arrayvec = { version = "^0.4", default-features = false } ++ ++[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] ++version = "^0.1.2" ++optional = true ++ ++[target.wasm32-unknown-unknown.dev-dependencies] ++wasm-bindgen = "=0.2.19" ++wasm-bindgen-test = "=0.2.19" +\ No newline at end of file +diff --git a/third_party/rust/simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE +similarity index 100% +rename from third_party/rust/simd/LICENSE-APACHE +rename to third_party/rust/packed_simd/LICENSE-APACHE +diff --git a/third_party/rust/simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT +similarity index 93% +rename from third_party/rust/simd/LICENSE-MIT +rename to third_party/rust/packed_simd/LICENSE-MIT +index bf6c304f7774..39d4bdb5acd3 100644 +--- a/third_party/rust/simd/LICENSE-MIT ++++ b/third_party/rust/packed_simd/LICENSE-MIT +@@ -1,25 +1,25 @@ +-Copyright (c) 2014 Huon Wilson ++Copyright (c) 2014 The Rust Project Developers + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-DEALINGS IN THE SOFTWARE. +\ No newline at end of file ++DEALINGS IN THE SOFTWARE. +diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml +new file mode 100644 +index 000000000000..6d302dc85cf6 +--- /dev/null ++++ b/third_party/rust/packed_simd/bors.toml +@@ -0,0 +1,3 @@ ++status = [ ++ "continuous-integration/travis-ci/push" ++] +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs +new file mode 100644 +index 000000000000..85639ff9d085 +--- /dev/null ++++ b/third_party/rust/packed_simd/build.rs +@@ -0,0 +1,8 @@ ++fn main() { ++ println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); ++ let target = std::env::var("TARGET") ++ .expect("TARGET environment variable not defined"); ++ if target.contains("neon") { ++ println!("cargo:rustc-cfg=libcore_neon"); ++ } ++} +diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh +new file mode 100644 +index 000000000000..273562d4a9bb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/all.sh +@@ -0,0 +1,71 @@ ++#!/usr/bin/env bash ++# ++# Performs an operation on all targets ++ ++set -ex ++ ++: "${1?The all.sh script requires one argument.}" ++ ++op=$1 ++ ++cargo_clean() { ++ cargo clean ++} ++ ++cargo_check_fmt() { ++ cargo fmt --all -- --check ++} ++ ++cargo_fmt() { ++ cargo fmt --all ++} ++ ++cargo_clippy() { ++ cargo clippy --all -- -D clippy::pedantic ++} ++ ++CMD="-1" ++ ++case $op in ++ clean*) ++ CMD=cargo_clean ++ ;; ++ check_fmt*) ++ CMD=cargo_check_fmt ++ ;; ++ fmt*) ++ CMD=cargo_fmt ++ ;; ++ clippy) ++ CMD=cargo_clippy ++ ;; ++ *) ++ echo "Unknown operation: \"${op}\"" ++ exit 1 ++ ;; ++esac ++ ++echo "Operation is: ${CMD}" ++ ++# On src/ ++$CMD ++ ++# Check examples/ ++for dir in examples/*/ ++do ++ dir=${dir%*/} ++ ( ++ cd "${dir%*/}" ++ $CMD ++ ) ++done ++ ++( ++ cd verify/verify ++ $CMD ++) ++ ++( ++ cd micro_benchmarks ++ $CMD ++) +diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh +new file mode 100644 +index 000000000000..818e78446ae8 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh +@@ -0,0 +1,37 @@ ++#!/usr/bin/env sh ++# Copyright 2016 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip ++unzip -q android-ndk-r15b-linux-x86_64.zip ++ ++case "$1" in ++ aarch64) ++ arch=arm64 ++ ;; ++ ++ i686) ++ arch=x86 ++ ;; ++ ++ *) ++ arch=$1 ++ ;; ++esac; ++ ++android-ndk-r15b/build/tools/make_standalone_toolchain.py \ ++ --unified-headers \ ++ --install-dir "/android/ndk-${1}" \ ++ --arch "${arch}" \ ++ --api 24 ++ ++rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b +diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh +new file mode 100644 +index 000000000000..6b5ac09ab04a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh +@@ -0,0 +1,60 @@ ++#!/usr/bin/env sh ++# Copyright 2016 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++# Prep the SDK and emulator ++# ++# Note that the update process requires that we accept a bunch of licenses, and ++# we can't just pipe `yes` into it for some reason, so we take the same strategy ++# located in https://github.com/appunite/docker by just wrapping it in a script ++# which apparently magically accepts the licenses. ++ ++mkdir sdk ++curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O ++unzip -d sdk sdk-tools-linux-3859397.zip ++ ++case "$1" in ++ arm | armv7) ++ abi=armeabi-v7a ++ ;; ++ ++ aarch64) ++ abi=arm64-v8a ++ ;; ++ ++ i686) ++ abi=x86 ++ ;; ++ ++ x86_64) ++ abi=x86_64 ++ ;; ++ ++ *) ++ echo "invalid arch: $1" ++ exit 1 ++ ;; ++esac; ++ ++# --no_https avoids ++ # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found ++yes | ./sdk/tools/bin/sdkmanager --licenses --no_https ++yes | ./sdk/tools/bin/sdkmanager --no_https \ ++ "emulator" \ ++ "platform-tools" \ ++ "platforms;android-24" \ ++ "system-images;android-24;default;$abi" ++ ++echo "no" | ++ ./sdk/tools/bin/avdmanager create avd \ ++ --name "${1}" \ ++ --package "system-images;android-24;default;$abi" +diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh +new file mode 100644 +index 000000000000..9eabd7c8d94f +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-sysimage.sh +@@ -0,0 +1,56 @@ ++#!/usr/bin/env bash ++ ++# Copyright 2017 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++URL=https://dl.google.com/android/repository/sys-img/android ++ ++main() { ++ local arch="${1}" ++ local name="${2}" ++ local dest=/system ++ local td ++ td="$(mktemp -d)" ++ ++ apt-get install --no-install-recommends e2tools ++ ++ pushd "${td}" ++ curl --retry 5 -O "${URL}/${name}" ++ unzip -q "${name}" ++ ++ local system ++ system="$(find . -name system.img)" ++ mkdir -p ${dest}/{bin,lib,lib64} ++ ++ # Extract android linker and libraries to /system ++ # This allows android executables to be run directly (or with qemu) ++ if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then ++ e2cp -p "${system}:/bin/linker64" "${dest}/bin/" ++ e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/" ++ e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/" ++ e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/" ++ else ++ e2cp -p "${system}:/bin/linker" "${dest}/bin/" ++ e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/" ++ e2cp -p "${system}:/lib/libc.so" "${dest}/lib/" ++ e2cp -p "${system}:/lib/libm.so" "${dest}/lib/" ++ fi ++ ++ # clean up ++ apt-get purge --auto-remove -y e2tools ++ ++ popd ++ ++ rm -rf "${td}" ++} ++ ++main "${@}" +diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh +new file mode 100644 +index 000000000000..3635b9e371d1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/benchmark.sh +@@ -0,0 +1,32 @@ ++#!/usr/bin/env bash ++# ++# Runs all benchmarks. Controlled by the following environment variables: ++# ++# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc) ++# NORUN={1} - only builds the benchmarks ++ ++set -ex ++ ++if [[ ${NORUN} != 1 ]]; then ++ # Most benchmarks require hyperfine; require it upfront. ++ hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; } ++fi ++ ++ ++# If the ispc benchmark feature is enabled, ispc must be in the path of the ++# benchmarks. ++if echo "$FEATURES" | grep -q "ispc"; then ++ hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; } ++fi ++ ++# An example with a benchmark.sh is a benchmark: ++for dir in examples/*/ ++do ++ dir=${dir%*/} ++ cd ${dir%*/} ++ if [ -f "benchmark.sh" ]; then ++ ./benchmark.sh ++ fi ++ cd - ++done ++ +diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs +new file mode 100644 +index 000000000000..c0fe52c35659 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs +@@ -0,0 +1,176 @@ ++// Copyright 2017 The Rust Project Developers. See the COPYRIGHT ++// file at the top-level directory of this distribution and at ++// http://rust-lang.org/COPYRIGHT. ++// ++// Licensed under the Apache License, Version 2.0 or the MIT license ++// , at your ++// option. This file may not be copied, modified, or distributed ++// except according to those terms. ++ ++// This is a script to deploy and execute a binary on an iOS simulator. ++// The primary use of this is to be able to run unit tests on the simulator and ++// retrieve the results. ++// ++// To do this through Cargo instead, use Dinghy ++// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy ++// test. ++ ++use std::env; ++use std::fs::{self, File}; ++use std::io::Write; ++use std::path::Path; ++use std::process; ++use std::process::Command; ++ ++macro_rules! t { ++ ($e:expr) => (match $e { ++ Ok(e) => e, ++ Err(e) => panic!("{} failed with: {}", stringify!($e), e), ++ }) ++} ++ ++// Step one: Wrap as an app ++fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) { ++ println!("Packaging simulator app"); ++ drop(fs::remove_dir_all("ios_simulator_app")); ++ t!(fs::create_dir("ios_simulator_app")); ++ t!(fs::copy(test_binary_path, ++ Path::new("ios_simulator_app").join(crate_name))); ++ ++ let mut f = t!(File::create("ios_simulator_app/Info.plist")); ++ t!(f.write_all(format!(r#" ++ ++ ++ ++ ++ CFBundleExecutable ++ {} ++ CFBundleIdentifier ++ com.rust.unittests ++ ++ ++ "#, crate_name).as_bytes())); ++} ++ ++// Step two: Start the iOS simulator ++fn start_simulator() { ++ println!("Looking for iOS simulator"); ++ let output = t!(Command::new("xcrun").arg("simctl").arg("list").output()); ++ assert!(output.status.success()); ++ let mut simulator_exists = false; ++ let mut simulator_booted = false; ++ let mut found_rust_sim = false; ++ let stdout = t!(String::from_utf8(output.stdout)); ++ for line in stdout.lines() { ++ if line.contains("rust_ios") { ++ if found_rust_sim { ++ panic!("Duplicate rust_ios simulators found. Please \ ++ double-check xcrun simctl list."); ++ } ++ simulator_exists = true; ++ simulator_booted = line.contains("(Booted)"); ++ found_rust_sim = true; ++ } ++ } ++ ++ if simulator_exists == false { ++ println!("Creating iOS simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("create") ++ .arg("rust_ios") ++ .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE") ++ .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2") ++ .check_status(); ++ } else if simulator_booted == true { ++ println!("Shutting down already-booted simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("shutdown") ++ .arg("rust_ios") ++ .check_status(); ++ } ++ ++ println!("Starting iOS simulator"); ++ // We can't uninstall the app (if present) as that will hang if the ++ // simulator isn't completely booted; just erase the simulator instead. ++ Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status(); ++ Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status(); ++} ++ ++// Step three: Install the app ++fn install_app_to_simulator() { ++ println!("Installing app to simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("install") ++ .arg("booted") ++ .arg("ios_simulator_app/") ++ .check_status(); ++} ++ ++// Step four: Run the app ++fn run_app_on_simulator() { ++ println!("Running app"); ++ let output = t!(Command::new("xcrun") ++ .arg("simctl") ++ .arg("launch") ++ .arg("--console") ++ .arg("booted") ++ .arg("com.rust.unittests") ++ .output()); ++ ++ println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout)); ++ println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr)); ++ ++ let stdout = String::from_utf8_lossy(&output.stdout); ++ let failed = stdout.lines() ++ .find(|l| l.contains("FAILED")) ++ .map(|l| l.contains("FAILED")) ++ .unwrap_or(false); ++ ++ let passed = stdout.lines() ++ .find(|l| l.contains("test result: ok")) ++ .map(|l| l.contains("test result: ok")) ++ .unwrap_or(false); ++ ++ println!("Shutting down simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("shutdown") ++ .arg("rust_ios") ++ .check_status(); ++ if !(passed && !failed) { ++ panic!("tests didn't pass"); ++ } ++} ++ ++trait CheckStatus { ++ fn check_status(&mut self); ++} ++ ++impl CheckStatus for Command { ++ fn check_status(&mut self) { ++ println!("\trunning: {:?}", self); ++ assert!(t!(self.status()).success()); ++ } ++} ++ ++fn main() { ++ let args: Vec = env::args().collect(); ++ if args.len() != 2 { ++ println!("Usage: {} ", args[0]); ++ process::exit(-1); ++ } ++ ++ let test_binary_path = Path::new(&args[1]); ++ let crate_name = test_binary_path.file_name().unwrap(); ++ ++ package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path); ++ start_simulator(); ++ install_app_to_simulator(); ++ run_app_on_simulator(); ++} +diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile +new file mode 100644 +index 000000000000..27bde89c5a8d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=aarch64 ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \ ++ CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \ ++ OBJDUMP=aarch64-linux-android-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..68261a2f033d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,14 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-aarch64-linux-gnu \ ++ libc6-dev-arm64-cross \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ ++ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \ ++ OBJDUMP=aarch64-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile +new file mode 100644 +index 000000000000..995a9e30e65e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=arm ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ ++ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ ++ OBJDUMP=arm-linux-androideabi-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +new file mode 100644 +index 000000000000..cb4de6a57eaa +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +@@ -0,0 +1,15 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ libc6-armel-cross \ ++ libc6-dev-armel-cross \ ++ binutils-arm-linux-gnueabi \ ++ gcc-arm-linux-gnueabi \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ ++ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \ ++ OBJDUMP=arm-linux-gnueabi-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..c7bd61f0a796 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..e01b87afdf56 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..857974a858f1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,7 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc-multilib \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates +diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..857974a858f1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,7 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc-multilib \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates +diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..4711cead372a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips-linux-gnu libc6-dev-mips-cross \ ++ qemu-system-mips \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ ++ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \ ++ OBJDUMP=mips-linux-gnu-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +new file mode 100644 +index 000000000000..1422e8c80924 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \ ++ qemu-system-mips64 qemu-user ++ ++ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ ++ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \ ++ OBJDUMP=mips64-linux-gnuabi64-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +new file mode 100644 +index 000000000000..d94deb5b2013 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \ ++ qemu-system-mips64el ++ ++ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ ++ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \ ++ OBJDUMP=mips64el-linux-gnuabi64-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile +new file mode 100644 +index 000000000000..40ac50675bd9 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile +@@ -0,0 +1,25 @@ ++FROM ubuntu:18.10 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ gcc \ ++ libc6-dev \ ++ make \ ++ qemu-user \ ++ qemu-system-mips \ ++ bzip2 \ ++ curl \ ++ file ++ ++RUN mkdir /toolchain ++ ++# Note that this originally came from: ++# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 ++RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \ ++ tar xjf - -C /toolchain --strip-components=2 ++ ++ENV PATH=$PATH:/rust/bin:/toolchain/bin \ ++ CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \ ++ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \ ++ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain" +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..43b174ed87fc +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,12 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ ++ qemu-system-ppc \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \ ++ OBJDUMP=powerpc-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..7757ad28a42d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,17 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-powerpc64-linux-gnu \ ++ libc6-dev-ppc64-cross \ ++ qemu-user \ ++ qemu-system-ppc \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \ ++ CC=powerpc64-linux-gnu-gcc \ ++ OBJDUMP=powerpc64-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..0b0c214fdf1b +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,11 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ ++ qemu-system-ppc file make ++ ++ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \ ++ CC=powerpc64le-linux-gnu-gcc \ ++ OBJDUMP=powerpc64le-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..c645b0bcc2b8 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,20 @@ ++FROM ubuntu:18.10 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ curl \ ++ cmake \ ++ gcc \ ++ libc6-dev \ ++ g++-s390x-linux-gnu \ ++ libc6-dev-s390x-cross \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \ ++ CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \ ++ CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \ ++ CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \ ++ OBJDUMP=s390x-linux-gnu-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..fe12af14da6f +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,18 @@ ++FROM debian:stretch ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ curl ca-certificates \ ++ gcc libc6-dev \ ++ gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \ ++ qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \ ++ p7zip-full cpio ++ ++COPY linux-sparc64.sh / ++RUN bash /linux-sparc64.sh ++ ++COPY test-runner-linux / ++ ++ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \ ++ CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \ ++ CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \ ++ PATH=$PATH:/rust/bin +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile +new file mode 100644 +index 000000000000..c1da77109c12 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=arm ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ ++ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ ++ OBJDUMP=arm-linux-androideabi-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..696cb6c3fb52 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile +new file mode 100644 +index 000000000000..f905cf1a36eb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile +@@ -0,0 +1,37 @@ ++FROM ubuntu:18.04 ++ ++RUN apt-get update -y && apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ clang \ ++ cmake \ ++ curl \ ++ git \ ++ libc6-dev \ ++ make \ ++ python \ ++ xz-utils ++ ++# Install `wasm2wat` ++RUN git clone --recursive https://github.com/WebAssembly/wabt ++RUN make -C wabt -j$(nproc) ++ENV PATH=$PATH:/wabt/bin ++ ++# Install `wasm-bindgen-test-runner` ++RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.19/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl.tar.gz \ ++ | tar xzf - ++ENV PATH=$PATH:/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl ++ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner ++ ++# Install `node` ++RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf - ++ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin ++ ++# We use a shim linker that removes `--strip-debug` when passed to LLD. While ++# this typically results in invalid debug information in release mode it doesn't ++# result in an invalid names section which is what we're interested in. ++COPY lld-shim.rs / ++ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim ++ ++# Rustc isn't available until this container starts, so defer compilation of the ++# shim. ++ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@" +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile +new file mode 100644 +index 000000000000..d52dd45b12bf +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile +@@ -0,0 +1,29 @@ ++FROM ubuntu:16.04 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ curl \ ++ gcc \ ++ libc-dev \ ++ python \ ++ unzip \ ++ file \ ++ make ++ ++WORKDIR /android/ ++ENV ANDROID_ARCH=x86_64 ++COPY android-install-ndk.sh /android/ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++ ++# We do not run x86_64-linux-android tests on an android emulator. ++# See ci/android-sysimage.sh for informations about how tests are run. ++COPY android-sysimage.sh /android/ ++RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip ++ ++ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \ ++ CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \ ++ CC_x86_64_linux_android=x86_64-linux-android-gcc \ ++ CXX_x86_64_linux_android=x86_64-linux-android-g++ \ ++ OBJDUMP=x86_64-linux-android-objdump \ ++ HOME=/tmp +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +new file mode 100644 +index 000000000000..a6bbe6653928 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +@@ -0,0 +1,16 @@ ++FROM ubuntu:18.04 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates \ ++ wget \ ++ bzip2 \ ++ cmake \ ++ libclang-dev \ ++ clang ++ ++RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2 ++RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2 ++ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --" +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..e6b000d0516e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates \ ++ cmake \ ++ libclang-dev \ ++ clang +diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh +new file mode 100644 +index 000000000000..1743366407e3 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/dox.sh +@@ -0,0 +1,24 @@ ++#!/bin/sh ++ ++set -ex ++ ++rm -rf target/doc ++mkdir -p target/doc ++ ++# Build API documentation ++cargo doc --features=into_bits ++ ++# Build Performance Guide ++# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780 ++# mdbook build perf-guide -d target/doc/perf-guide ++cd perf-guide ++mdbook build ++cd - ++cp -r perf-guide/book target/doc/perf-guide ++ ++# If we're on travis, not a PR, and on the right branch, publish! ++if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then ++ pip install ghp_import --install-option="--prefix=$HOME/.local" ++ $HOME/.local/bin/ghp-import -n target/doc ++ git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages ++fi +diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh +new file mode 100644 +index 000000000000..972abeec569e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/linux-s390x.sh +@@ -0,0 +1,18 @@ ++set -ex ++ ++mkdir -m 777 /qemu ++cd /qemu ++ ++curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img ++curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian ++curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian ++ ++mv kernel.debian kernel ++mv initrd.debian initrd.gz ++ ++mkdir init ++cd init ++gunzip -c ../initrd.gz | cpio -id ++rm ../initrd.gz ++cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/ ++chmod a+w . +diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh +new file mode 100644 +index 000000000000..4452b120e1b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh +@@ -0,0 +1,17 @@ ++set -ex ++ ++mkdir -m 777 /qemu ++cd /qemu ++ ++curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso ++7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz ++7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64 ++mv sparc64 kernel ++rm debian-9.0-sparc64-NETINST-1.iso ++ ++mkdir init ++cd init ++gunzip -c ../initrd.gz | cpio -id ++rm ../initrd.gz ++cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/ ++chmod a+w . +diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs +new file mode 100644 +index 000000000000..10263869e8dc +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/lld-shim.rs +@@ -0,0 +1,11 @@ ++use std::os::unix::prelude::*; ++use std::process::Command; ++use std::env; ++ ++fn main() { ++ let args = env::args() ++ .skip(1) ++ .filter(|s| s != "--strip-debug") ++ .collect::>(); ++ panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec()); ++} +diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh +new file mode 100644 +index 000000000000..f70639b6f89b +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/max_line_width.sh +@@ -0,0 +1,17 @@ ++#!/usr/bin/env sh ++ ++set -x ++ ++export success=true ++ ++find . -iname '*.rs' | while read -r file; do ++ result=$(grep '.\{79\}' "${file}" | grep --invert 'http') ++ if [ "${result}" = "" ] ++ then ++ : ++ else ++ echo "file \"${file}\": $result" ++ exit 1 ++ fi ++done ++ +diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh +new file mode 100644 +index 000000000000..abdd6852fc3a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run-docker.sh +@@ -0,0 +1,38 @@ ++# Small script to run tests for a target (or all targets) inside all the ++# respective docker images. ++ ++set -ex ++ ++run() { ++ echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}" ++ docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/ ++ mkdir -p target ++ target=$(echo "${TARGET}" | sed 's/-emulated//') ++ echo "Running docker" ++ docker run \ ++ --user `id -u`:`id -g` \ ++ --rm \ ++ --init \ ++ --volume $HOME/.cargo:/cargo \ ++ --env CARGO_HOME=/cargo \ ++ --volume `rustc --print sysroot`:/rust:ro \ ++ --env TARGET=$target \ ++ --env NORUN \ ++ --env NOVERIFY \ ++ --env RUSTFLAGS \ ++ --volume `pwd`:/checkout:ro \ ++ --volume `pwd`/target:/checkout/target \ ++ --workdir /checkout \ ++ --privileged \ ++ packed_simd \ ++ bash \ ++ -c 'PATH=$PATH:/rust/bin exec ci/run.sh' ++} ++ ++if [ -z "${TARGET}" ]; then ++ for d in `ls ci/docker/`; do ++ run $d ++ done ++else ++ run ${TARGET} ++fi +diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh +new file mode 100644 +index 000000000000..7bb825883680 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run.sh +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++ ++set -ex ++ ++: ${TARGET?"The TARGET environment variable must be set."} ++ ++# Tests are all super fast anyway, and they fault often enough on travis that ++# having only one thread increases debuggability to be worth it. ++#export RUST_TEST_THREADS=1 ++#export RUST_BACKTRACE=full ++#export RUST_TEST_NOCAPTURE=1 ++ ++# Some appveyor builds run out-of-memory; this attempts to mitigate that: ++# https://github.com/rust-lang-nursery/packed_simd/issues/39 ++# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1" ++# export CARGO_BUILD_JOBS=1 ++ ++export CARGO_SUBCMD=test ++if [[ "${NORUN}" == "1" ]]; then ++ export CARGO_SUBCMD=build ++fi ++ ++if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then ++ export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0" ++ rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest ++ export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest ++ export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest ++fi ++ ++# The source directory is read-only. Need to copy internal crates to the target ++# directory for their Cargo.lock to be properly written. ++mkdir target || true ++ ++rustc --version ++cargo --version ++echo "TARGET=${TARGET}" ++echo "HOST=${HOST}" ++echo "RUSTFLAGS=${RUSTFLAGS}" ++echo "NORUN=${NORUN}" ++echo "NOVERIFY=${NOVERIFY}" ++echo "CARGO_SUBCMD=${CARGO_SUBCMD}" ++echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}" ++echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}" ++echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}" ++echo "RUST_BACKTRACE=${RUST_BACKTRACE}" ++echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}" ++ ++cargo_test() { ++ cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}" ++ if [ "${NORUN}" != "1" ] ++ then ++ if [ "$TARGET" != "wasm32-unknown-unknown" ] ++ then ++ cmd="$cmd -- --quiet" ++ fi ++ fi ++ mkdir target || true ++ ${cmd} 2>&1 | tee > target/output ++ if [[ ${PIPESTATUS[0]} != 0 ]]; then ++ cat target/output ++ return 1 ++ fi ++} ++ ++cargo_test_impl() { ++ ORIGINAL_RUSTFLAGS=${RUSTFLAGS} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@} ++ RUSTFLAGS=${ORIGINAL_RUSTFLAGS} ++} ++ ++# Debug run: ++if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then ++ # Run wasm32-unknown-unknown in release mode only ++ cargo_test_impl ++fi ++ ++if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then ++ # use sleef on linux and windows x86_64 builds ++ cargo_test_impl --release --features=into_bits,core_arch,sleef-sys ++else ++ cargo_test_impl --release --features=into_bits,core_arch ++fi ++ ++# Verify code generation ++if [[ "${NOVERIFY}" != "1" ]]; then ++ cp -r verify/verify target/verify ++ export STDSIMD_ASSERT_INSTR_LIMIT=30 ++ if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then ++ export STDSIMD_ASSERT_INSTR_LIMIT=50 ++ fi ++ cargo_test --release --manifest-path=target/verify/Cargo.toml ++fi ++ ++. ci/run_examples.sh +diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh +new file mode 100644 +index 000000000000..5b26b18afb20 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run_examples.sh +@@ -0,0 +1,51 @@ ++# Runs all examples. ++ ++# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55 ++# All examples fail to build for `armv7-apple-ios`. ++if [[ ${TARGET} == "armv7-apple-ios" ]]; then ++ exit 0 ++fi ++ ++# FIXME: travis exceeds 50 minutes on these targets ++# Skipping the examples is an attempt at preventing travis from timing-out ++if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \ ++ || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then ++ exit 0 ++fi ++ ++if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then ++ exit 0 ++fi ++ ++cp -r examples/aobench target/aobench ++cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features ++cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit ++ ++cp -r examples/dot_product target/dot_product ++cargo_test --manifest-path=target/dot_product/Cargo.toml --release ++ ++cp -r examples/fannkuch_redux target/fannkuch_redux ++cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release ++ ++# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56 ++if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then ++ cp -r examples/mandelbrot target/mandelbrot ++ cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release ++fi ++ ++cp -r examples/matrix_inverse target/matrix_inverse ++cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release ++ ++cp -r examples/nbody target/nbody ++cargo_test --manifest-path=target/nbody/Cargo.toml --release ++ ++cp -r examples/spectral_norm target/spectral_norm ++cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release ++ ++if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then ++ cp -r examples/stencil target/stencil ++ cargo_test --manifest-path=target/stencil/Cargo.toml --release ++fi ++ ++cp -r examples/triangle_xform target/triangle_xform ++cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release +diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs +new file mode 100644 +index 000000000000..ed1cd80c834a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/runtest-android.rs +@@ -0,0 +1,45 @@ ++use std::env; ++use std::process::Command; ++use std::path::{Path, PathBuf}; ++ ++fn main() { ++ let args = env::args_os() ++ .skip(1) ++ .filter(|arg| arg != "--quiet") ++ .collect::>(); ++ assert_eq!(args.len(), 1); ++ let test = PathBuf::from(&args[0]); ++ let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap()); ++ ++ let status = Command::new("adb") ++ .arg("wait-for-device") ++ .status() ++ .expect("failed to run: adb wait-for-device"); ++ assert!(status.success()); ++ ++ let status = Command::new("adb") ++ .arg("push") ++ .arg(&test) ++ .arg(&dst) ++ .status() ++ .expect("failed to run: adb pushr"); ++ assert!(status.success()); ++ ++ let output = Command::new("adb") ++ .arg("shell") ++ .arg(&dst) ++ .output() ++ .expect("failed to run: adb shell"); ++ assert!(status.success()); ++ ++ println!("status: {}\nstdout ---\n{}\nstderr ---\n{}", ++ output.status, ++ String::from_utf8_lossy(&output.stdout), ++ String::from_utf8_lossy(&output.stderr)); ++ ++ let stdout = String::from_utf8_lossy(&output.stdout); ++ let mut lines = stdout.lines().filter(|l| l.starts_with("test result")); ++ if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) { ++ panic!("failed to find successful test run"); ++ } ++} +diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh +new file mode 100644 +index 000000000000..ddc4765d5ceb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh +@@ -0,0 +1,10 @@ ++#!/usr/bin/env bash ++ ++set -ex ++ ++# Get latest ISPC binary for the target and put it in the path ++git clone https://github.com/gnzlbg/ispc-binaries ++cp ispc-binaries/ispc-${TARGET} ispc ++ ++# Rust-bindgen requires RUSTFMT ++rustup component add rustfmt-preview +diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux +new file mode 100644 +index 000000000000..0654f63bfdb9 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/test-runner-linux +@@ -0,0 +1,24 @@ ++#!/bin/sh ++ ++set -e ++ ++arch=$1 ++prog=$2 ++ ++cd /qemu/init ++cp -f $2 prog ++find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz ++cd .. ++ ++timeout 30s qemu-system-$arch \ ++ -m 1024 \ ++ -nographic \ ++ -kernel kernel \ ++ -initrd initrd.gz \ ++ -append init=/prog > output || true ++ ++# remove kernel messages ++tr -d '\r' < output | egrep -v '^\[' ++ ++# if the output contains a failure, return error ++! grep FAILED output > /dev/null +diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md +new file mode 100644 +index 000000000000..93fa92783740 +--- /dev/null ++++ b/third_party/rust/packed_simd/contributing.md +@@ -0,0 +1,67 @@ ++# Contributing to `packed_simd` ++ ++Welcome! If you are reading this document, it means you are interested in contributing ++to the `packed_simd` crate. ++ ++## Reporting issues ++ ++All issues with this crate are tracked using GitHub's [Issue Tracker]. ++ ++You can use issues to bring bugs to the attention of the maintainers, to discuss ++certain problems encountered with the crate, or to request new features (although ++feature requests should be limited to things mentioned in the [RFC]). ++ ++One thing to keep in mind is to always use the **latest** nightly toolchain when ++working on this crate. Due to the nature of this project, we use a lot of unstable ++features, meaning breakage happens often. ++ ++[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues ++[RFC]: https://github.com/rust-lang/rfcs/pull/2366 ++ ++### LLVM issues ++ ++The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation, ++and quite a few LLVM bugs have been discovered during the development of this project. ++ ++If you encounter issues with incorrect/suboptimal codegen, which you do not encounter ++when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/), ++it is likely the issue is with LLVM, or this crate's interaction with it. ++ ++You should first open an issue **in this repo** to help us track the problem, and we ++will help determine what is the exact cause of the problem. ++If LLVM is indeed the cause, the issue will be reported upstream to the ++[LLVM bugtracker](https://bugs.llvm.org/). ++ ++## Submitting Pull Requests ++ ++New code is submitted to the crate using GitHub's [pull request] mechanism. ++You should first fork this repository, make your changes (preferrably in a new ++branch), then use GitHub's web UI to create a new PR. ++ ++[pull request]: https://help.github.com/articles/about-pull-requests/ ++ ++### Examples ++ ++The `examples` directory contains code showcasing SIMD code written with this crate, ++usually in comparison to scalar or ISPC code. If you have a project / idea which ++uses SIMD, we'd love to add it to the examples list. ++ ++Every example should include a small `README`, describing the example code's purpose. ++If your example could potentially work as a benchmark, then add a `benchmark.sh` ++script to allow running the example benchmark code in CI. See an existing example's ++[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample. ++ ++Don't forget to update the crate's top-level `README` with a link to your example. ++ ++### Perf guide ++ ++The objective of the [performance guide][perf-guide] is to be a comprehensive ++resource detailing the process of optimizing Rust code with SIMD support. ++ ++If you believe a certain section could be reworded, or if you have any tips & tricks ++related to SIMD which you'd like to share, please open a PR. ++ ++[mdBook] is used to manage the formatting of the guide as a book. ++ ++[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ ++[mdBook]: https://github.com/rust-lang-nursery/mdBook +diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore +new file mode 100644 +index 000000000000..5a0bf0317d75 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/.gitignore +@@ -0,0 +1 @@ ++/book +diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml +new file mode 100644 +index 000000000000..69ba3053ca25 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/book.toml +@@ -0,0 +1,12 @@ ++[book] ++authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"] ++multilingual = false ++src = "src" ++title = "Rust SIMD Performance Guide" ++description = "This book describes how to write performant SIMD code in Rust." ++ ++[build] ++create-missing = false ++ ++[output.html] ++additional-css = ["./src/ascii.css"] +diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md +new file mode 100644 +index 000000000000..1e76898865c5 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md +@@ -0,0 +1,21 @@ ++# Summary ++ ++[Introduction](./introduction.md) ++ ++- [Floating-point Math](./float-math/fp.md) ++ - [Short-vector Math Library](./float-math/svml.md) ++ - [Approximate functions](./float-math/approx.md) ++ - [Fused multiply-accumulate](./float-math/fma.md) ++ ++- [Target features](./target-feature/features.md) ++ - [Using `RUSTFLAGS`](./target-feature/rustflags.md) ++ - [Using the `target_feature` attribute](./target-feature/attribute.md) ++ - [Interaction with inlining](./target-feature/inlining.md) ++ - [Detecting features at runtime](./target-feature/runtime.md) ++ ++- [Bounds checking](./bound_checks.md) ++- [Vertical and horizontal operations](./vert-hor-ops.md) ++ ++- [Performance profiling](./prof/profiling.md) ++ - [Profiling on Linux](./prof/linux.md) ++ - [Using machine code analyzers](./prof/mca.md) +diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css +new file mode 100644 +index 000000000000..4c02651195f9 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css +@@ -0,0 +1,4 @@ ++code { ++ /* "Source Code Pro" breaks ASCII art */ ++ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace; ++} +diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md +new file mode 100644 +index 000000000000..2eeedb5ac829 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md +@@ -0,0 +1,22 @@ ++# Bounds checking ++ ++Reading and writing packed vectors to/from slices is checked by default. ++Independently of the configuration options used, the safe functions: ++ ++* `Simd<[T; N]>::from_slice_aligned(& s[..])` ++* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])` ++ ++always check that: ++ ++* the slice is big enough to hold the vector ++* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T; ++ N]>` (this alignment is often much larger than that of `T`). ++ ++There are `_unaligned` versions that use unaligned load and stores, as well as ++`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions = ++false` / `debug = false`. That is, the `_unchecked` methods do still assert size ++and alignment in debug builds and could also do so in release builds depending ++on the configuration options. ++ ++These assertions do often significantly impact performance and you should be ++aware of them. +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md +new file mode 100644 +index 000000000000..2237c67ec4b3 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md +@@ -0,0 +1,8 @@ ++# Approximate functions ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md +new file mode 100644 +index 000000000000..357748383d63 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md +@@ -0,0 +1,6 @@ ++# Fused Multiply Add ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md +new file mode 100644 +index 000000000000..711fcc4fd598 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md +@@ -0,0 +1,3 @@ ++# Floating-point math ++ ++This chapter contains information pertaining to working with floating-point numbers. +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md +new file mode 100644 +index 000000000000..266c2531cc04 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md +@@ -0,0 +1,7 @@ ++# Short Vector Math Library ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md +new file mode 100644 +index 000000000000..7243e19c8a54 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md +@@ -0,0 +1,26 @@ ++# Introduction ++ ++## What is SIMD ++ ++ ++ ++## History of SIMD in Rust ++ ++ ++ ++## Discover packed_simd ++ ++ ++ ++Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately, ++not trivial. There are many pitfals that one should be aware of, and some idioms ++that help avoid those pitfalls. ++ ++This book attempts to document these best practices and provides practical examples ++on how to apply the tips to _your_ code. +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md +new file mode 100644 +index 000000000000..96c7d67bc476 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md +@@ -0,0 +1,107 @@ ++# Performance profiling on Linux ++ ++## Using `perf` ++ ++[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler ++for Linux, featuring support for various hardware Performance Monitoring Units, ++as well as integration with the kernel's performance events framework. ++ ++We will only look at how can the `perf` command can be used to profile SIMD code. ++Full system profiling is outside of the scope of this book. ++ ++### Recording ++ ++The first step is to record a program's execution during an average workload. ++It helps if you can isolate the parts of your program which have performance ++issues, and set up a benchmark which can be easily (re)run. ++ ++Build the benchmark binary in release mode, after having enabled debug info: ++ ++```sh ++$ cargo build --release ++Finished release [optimized + debuginfo] target(s) in 0.02s ++``` ++ ++Then use the `perf record` subcommand: ++ ++```sh ++$ perf record --call-graph=dwarf ./target/release/my-program ++[ perf record: Woken up 10 times to write data ] ++[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ] ++``` ++ ++Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use ++`--call-graph=lbr` if you have a processor with support for Last Branch Record ++(i.e. Intel Haswell and newer). ++ ++`perf` will, by default, record the count of CPU cycles it takes to execute ++various parts of your program. You can use the `-e` command line option ++to enable other performance events, such as `cache-misses`. Use `perf list` ++to get a list of all hardware counters supported by your CPU. ++ ++### Viewing the report ++ ++The next step is getting a bird's eye view of the program's execution. ++`perf` provides a `ncurses`-based interface which will get you started. ++ ++Use `perf report` to open a visualization of your program's performance: ++ ++```sh ++perf report --hierarchy -M intel ++``` ++ ++`--hierarchy` will display a tree-like structure of where your program spent ++most of its time. `-M intel` enables disassembly output with Intel syntax, which ++is subjectively more readable than the default AT&T syntax. ++ ++Here is the output from profiling the `nbody` benchmark: ++ ++``` ++- 100,00% nbody ++ - 94,18% nbody ++ + 93,48% [.] nbody_lib::simd::advance ++ + 0,70% [.] nbody_lib::run ++ + 5,06% libc-2.28.so ++``` ++ ++If you move with the arrow keys to any node in the tree, you can the press `a` ++to have `perf` _annotate_ that node. This means it will: ++ ++- disassemble the function ++ ++- associate every instruction with the percentage of time which was spent executing it ++ ++- interleaves the disassembly with the source code, ++ assuming it found the debug symbols ++ (you can use `s` to toggle this behaviour) ++ ++`perf` will, by default, open the instruction which it identified as being the ++hottest spot in the function: ++ ++``` ++0,76 │ movapd xmm2,xmm0 ++0,38 │ movhlps xmm2,xmm0 ++ │ addpd xmm2,xmm0 ++ │ unpcklpd xmm1,xmm2 ++12,50 │ sqrtpd xmm0,xmm1 ++1,52 │ mulpd xmm0,xmm1 ++``` ++ ++In this case, `sqrtpd` will be highlighted in red, since that's the instruction ++which the CPU spends most of its time executing. ++ ++## Using Valgrind ++ ++Valgrind is a set of tools which initially helped C/C++ programmers find unsafe ++memory accesses in their code. Nowadays the project also has ++ ++- a heap profiler called `massif` ++ ++- a cache utilization profiler called `cachegrind` ++ ++- a call-graph performance profiler called `callgrind` ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md +new file mode 100644 +index 000000000000..65ddf1a4eb3a +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md +@@ -0,0 +1,100 @@ ++# Machine code analysis tools ++ ++## The microarchitecture of modern CPUs ++ ++While you might have heard of Instruction Set Architectures, such as `x86` or ++`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_), ++refers to the internal details of an actual family of CPUs, such as Intel's ++_Haswell_ or AMD's _Jaguar_. ++ ++Replacing scalar code with SIMD code will improve performance on all CPUs ++supporting the required vector extensions. ++However, due to microarchitectural differences, the actual speed-up at ++runtime might vary. ++ ++**Example**: a simple example arises when optimizing for AMD K8 CPUs. ++The assembly generated for an empty function should look like this: ++ ++```asm ++nop ++ret ++``` ++ ++The `nop` is used to align the `ret` instruction for better performance. ++However, the compiler will actually generated the following code: ++ ++```asm ++repz ret ++``` ++ ++The `repz` instruction will repeat the following instruction until a certain ++condition. Of course, in this situation, the function will simply immediately ++return, and the `ret` instruction is still aligned. ++However, AMD K8's branch predictor performs better with the latter code. ++ ++For those looking to absolutely maximize performance for a certain target µ-arch, ++you will have to read some CPU manuals, or ask the compiler to do it for you ++with `-C target-cpu`. ++ ++### Summary of CPU internals ++ ++Modern processors are able to execute instructions out-of-order for better performance, ++by utilizing tricks such as [branch prediction], [instruction pipelining], ++or [superscalar execution]. ++ ++[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor ++[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining ++[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor ++ ++SIMD instructions are also subject to these optimizations, meaning it can get pretty ++difficult to determine where the slowdown happens. ++For example, if the profiler reports a store operation is slow, one of two things ++could be happening: ++ ++- the store is limited by the CPU's memory bandwidth, which is actually an ideal ++ scenario, all things considered; ++ ++- memory bandwidth is nowhere near its peak, but the value to be stored is at the ++ end of a long chain of operations, and this store is where the profiler ++ encountered the pipeline stall; ++ ++Since most profilers are simple tools which don't understand the subtleties of ++instruction scheduling, you ++ ++## Analyzing the machine code ++ ++Certain tools have knowledge of internal CPU microarchitecture, i.e. they know ++ ++- how many physical [register files] a CPU actually has ++ ++- what is the latency / throughtput of an instruction ++ ++- what [µ-ops] are generated for a set of instructions ++ ++and many other architectural details. ++ ++[register files]: https://en.wikipedia.org/wiki/Register_file ++[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation ++ ++These tools are therefore able to provide accurate information as to why some ++instructions are inefficient, and where the bottleneck is. ++ ++The disadvantage is that the output of these tools requires advanced knowledge ++of the target architecture to understand, i.e. they **cannot** point out what ++the cause of the issue is explicitly. ++ ++## Intel's Architecture Code Analyzer (IACA) ++ ++[IACA] is a free tool offered by Intel for analyzing the performance of various ++computational kernels. ++ ++Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches. ++ ++[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer ++ ++## llvm-mca ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md +new file mode 100644 +index 000000000000..02ba78d2f22f +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md +@@ -0,0 +1,14 @@ ++# Performance profiling ++ ++While the rest of the book provides practical advice on how to improve the performance ++of SIMD code, this chapter is dedicated to [**performance profiling**][profiling]. ++Profiling consists of recording a program's execution in order to identify program ++hotspots. ++ ++**Important**: most profilers require debug information in order to accurately ++link the program hotspots back to the corresponding source code lines. Rust will ++disable debug info generation by default for optimized builds, but you can change ++that [in your `Cargo.toml`][cargo-ref]. ++ ++[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming) ++[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md +new file mode 100644 +index 000000000000..ee670fea5bd8 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md +@@ -0,0 +1,5 @@ ++# The `target_feature` attribute ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md +new file mode 100644 +index 000000000000..b93030ca6708 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md +@@ -0,0 +1,13 @@ ++# Enabling target features ++ ++Not all processors of a certain architecture will have SIMD processing units, ++and using a SIMD instruction which is not supported will trigger undefined behavior. ++ ++To allow building safe, portable programs, the Rust compiler will **not**, by default, ++generate any sort of vector instructions, unless it can statically determine ++they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed. ++The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of ++which features are enabled by default on various platforms, refer to the target ++specifications [in the compiler's source code][targets]. ++ ++[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md +new file mode 100644 +index 000000000000..86705102a74b +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md +@@ -0,0 +1,5 @@ ++# Inlining ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md +new file mode 100644 +index 000000000000..5b55c61c268a +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md +@@ -0,0 +1,31 @@ ++# Target features in practice ++ ++Using `RUSTFLAGS` will allow the crate being compiled, as well as all its ++transitive dependencies to use certain target features. ++ ++A tehnique used to avoid undefined behavior at runtime is to compile and ++ship multiple binaries, each compiled with a certain set of features. ++This might not be feasible in some cases, and can quickly get out of hand ++as more and more vector extensions are added to an architecture. ++ ++Rust can be more flexible: you can build a single binary/library which automatically ++picks the best supported vector instructions depending on the host machine. ++The trick consists of monomorphizing parts of the code during building, and then ++using run-time feature detection to select the right code path when running. ++ ++ ++ ++**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing ++SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause ++performance issues. ++ ++The solution is to compile all code, even the code written with 128-bit vectors, ++with the AVX target feature enabled. This will cause the compiler to prefix the ++generated instructions with the [VEX] prefix. ++ ++[VEX]: https://en.wikipedia.org/wiki/VEX_prefix +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md +new file mode 100644 +index 000000000000..47ddcc8660db +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md +@@ -0,0 +1,5 @@ ++# Detecting host features at runtime ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md +new file mode 100644 +index 000000000000..e2e806e085b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md +@@ -0,0 +1,77 @@ ++# Using RUSTFLAGS ++ ++One of the easiest ways to benefit from SIMD is to allow the compiler ++to generate code using certain vector instruction extensions. ++ ++The environment variable `RUSTFLAGS` can be used to pass options for code ++generation to the Rust compiler. These flags will affect **all** compiled crates. ++ ++There are two flags which can be used to enable specific vector extensions: ++ ++## target-feature ++ ++- Syntax: `-C target-feature=` ++ ++- Provides the compiler with a comma-separated set of instruction extensions ++ to enable. ++ ++ **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions ++ for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and ++ [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions). ++ ++- To list target triples for all targets supported by Rust, use: ++ ++ ```sh ++ rustc --print target-list ++ ``` ++ ++- To list all support target features for a certain target triple, use: ++ ++ ```sh ++ rustc --target=${TRIPLE} --print target-features ++ ``` ++ ++- Note that all CPU features are independent, and will have to be enabled individually. ++ ++ **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though ++ all CPUs which support AVX2 also support FMA. To enable both, one has to use ++ `-C target-features=+avx2,+fma` ++ ++- Some features also depend on other features, which need to be enabled for the ++ target instructions to be generated. ++ ++ **Example**: Unless `v7` is specified as the target CPU (see below), to enable ++ NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`. ++ ++## target-cpu ++ ++- Syntax: `-C target-cpu=` ++ ++- Sets the identifier of a CPU family / model for which to build and optimize the code. ++ ++ **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'` ++ ++- To list all supported target CPUs for a certain target triple, use: ++ ++ ```sh ++ rustc --target=${TRIPLE} --print target-cpus ++ ``` ++ ++ **Example**: ++ ++ ```sh ++ rustc --target=i686-pc-windows-msvc --print target-cpus ++ ``` ++ ++- The compiler will translate this into a list of target features. Therefore, ++ individual feature checks (`#[cfg(target_feature = "...")]`) will still ++ work properly. ++ ++- It will cause the code generator to optimize the generated code for that ++ specific CPU model. ++ ++- Using `native` as the CPU model will cause Rust to generate and optimize code ++ for the CPU running the compiler. It is useful when building programs which you ++ plan to only use locally. This should never be used when the generated programs ++ are meant to be run on other computers, such as when packaging for distribution ++ or cross-compiling. +diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md +new file mode 100644 +index 000000000000..d0dd1be12a19 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md +@@ -0,0 +1,76 @@ ++# Vertical and horizontal operations ++ ++In SIMD terminology, each vector has a certain "width" (number of lanes). ++A vector processor is able to perform two kinds of operations on a vector: ++ ++- Vertical operations: ++ operate on two vectors of the same width, result has same width ++ ++**Example**: vertical addition of two `f32x4` vectors ++ ++ %0 == | 2 | -3.5 | 0 | 7 | ++ + + + + ++ %1 == | 4 | 1.5 | -1 | 0 | ++ = = = = ++ %0 + %1 == | 6 | -2 | -1 | 7 | ++ ++- Horizontal operations: ++ reduce the elements of two vectors in some way, ++ the result's elements combine information from the two original ones ++ ++**Example**: horizontal addition of two `u64x2` vectors ++ ++ %0 == | 1 | 3 | ++ └─+───┘ ++ └───────┐ ++ │ ++ %1 == | 4 | -1 | │ ++ └─+──┘ │ ++ └───┐ │ ++ │ │ ++ ┌─────│───┘ ++ ▼ ▼ ++ %0 + %1 == | 4 | 3 | ++ ++## Performance consideration of horizontal operations ++ ++The result of vertical operations, like vector negation: `-a`, for a given lane, ++does not depend on the result of the operation for the other lanes. The result ++of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on ++the value of all vector lanes. ++ ++In virtually all architectures vertical operations are fast, while horizontal ++operations are, by comparison, very slow. ++ ++Consider the following two functions for computing the sum of all `f32` values ++in a slice: ++ ++```rust ++fn fast_sum(x: &[f32]) -> f32 { ++ assert!(x.len() % 4 == 0); ++ let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.] ++ for i in (0..x.len()).step_by(4) { ++ sum += f32x4::from_slice_unaligned(&x[i..]); ++ } ++ sum.sum() ++} ++ ++fn slow_sum(x: &[f32]) -> f32 { ++ assert!(x.len() % 4 == 0); ++ let mut sum: f32 = 0.; ++ for i in (0..x.len()).step_by(4) { ++ sum += f32x4::from_slice_unaligned(&x[i..]).sum(); ++ } ++ sum ++} ++``` ++ ++The inner loop over the slice is where the bulk of the work actually happens. ++There, the `fast_sum` function perform vertical operations into a vector, doing ++a single horizontal reduction at the end, while the `slow_sum` function performs ++horizontal vector operations inside of the loop. ++ ++On all widely-used architectures, `fast_sum` is a large constant factor faster ++than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On ++the particular machine tested there the algorithm using the horizontal vector ++addition is 2.7x slower than the one using vertical vector operations! +diff --git a/third_party/rust/packed_simd/readme.md b/third_party/rust/packed_simd/readme.md +new file mode 100644 +index 000000000000..3b27a2bba0d6 +--- /dev/null ++++ b/third_party/rust/packed_simd/readme.md +@@ -0,0 +1,182 @@ ++# `Simd<[T; N]>` ++ ++## Implementation of [Rust RFC #2366: `std::simd`][rfc2366] ++ ++[![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs] ++ ++> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization. ++ ++**WARNING**: this crate only supports the most recent nightly Rust toolchain. ++ ++## Documentation ++ ++* [API docs (`master` branch)][master_docs] ++* [Performance guide][perf_guide] ++* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to ++ https://github.com/rust-lang-nursery/packed_simd/issues/110 ++* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale, ++ discussion, etc. ++ ++## Examples ++ ++Most of the examples come with both a scalar and a vectorized implementation. ++ ++* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench) ++* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux) ++* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse) ++* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot) ++* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody) ++* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing) ++* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm) ++* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform) ++* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil) ++* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product) ++ ++## Cargo features ++ ++* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait ++ implementations for the vector types. These allow reinterpreting the bits of a ++ vector type as those of another vector type safely by just using the ++ `.into_bits()` method. ++ ++* `core_arch` (default: disabled): enable this feature to recompile `core::arch` ++ for the target-features enabled. `packed_simd` includes optimizations for some ++ target feature combinations that are enabled by this feature. Note, however, ++ that this is an unstable dependency, that rustc might break at any time. ++ ++* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF] ++ short-vector math library when profitable via the [`sleef-sys`][sleef_sys] ++ crate. [SLEEF] is licensed under the [Boost Software License ++ v1.0][boost_license], an extremely permissive license, and can be statically ++ linked without issues. ++ ++## Performance ++ ++The following [ISPC] examples are also part of `packed_simd`'s ++[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/) ++directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s ++Single-Program-Multiple-Data (SPMD) programming model. The performance results ++on different hardware is shown in the `readme.md` of each example. The following ++table summarizes the performance ranges, where `+` means speed-up and `-` ++slowdown: ++ ++* `aobench`: `[-1.02x, +1.53x]`, ++* `stencil`: `[+1.06x, +1.72x]`, ++* `mandelbrot`: `[-1.74x, +1.2x]`, ++* `options_pricing`: ++ * `black_scholes`: `+1.0x` ++ * `binomial_put`: `+1.4x` ++ ++ While SPMD is not the intended use case for `packed_simd`, it is possible to ++ combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming ++ model in Rust. Writing performant code is not as straightforward as with ++ [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one ++ can easily match and often out-perform [ISPC]'s "default performance". ++ ++## Platform support ++ ++The following table describes the supported platforms: `build` shows whether the ++library compiles without issues for a given target, while `run` shows whether ++the full testsuite passes on the target. ++ ++| Linux targets: | build | run | ++|-----------------------------------|-----------|---------| ++| `i586-unknown-linux-gnu` | ✓ | ✓ | ++| `i686-unknown-linux-gnu` | ✓ | ✓ | ++| `x86_64-unknown-linux-gnu` | ✓ | ✓ | ++| `arm-unknown-linux-gnueabi` | ✗ | ✗ | ++| `arm-unknown-linux-gnueabihf` | ✓ | ✓ | ++| `armv7-unknown-linux-gnueabi` | ✓ | ✓ | ++| `aarch64-unknown-linux-gnu` | ✓ | ✓ | ++| `mips-unknown-linux-gnu` | ✓ | ✓ | ++| `mipsel-unknown-linux-musl` | ✓ | ✓ | ++| `mips64-unknown-linux-gnuabi64` | ✓ | ✓ | ++| `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ | ++| `powerpc-unknown-linux-gnu` | ✗ | ✗ | ++| `powerpc64-unknown-linux-gnu` | ✗ | ✗ | ++| `powerpc64le-unknown-linux-gnu` | ✗ | ✗ | ++| `s390x-unknown-linux-gnu` | ✓ | ✓* | ++| `sparc64-unknown-linux-gnu` | ✓ | ✓* | ++| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ | ++| **MacOSX targets:** | **build** | **run** | ++| `x86_64-apple-darwin` | ✓ | ✓ | ++| `i686-apple-darwin` | ✓ | ✓ | ++| **Windows targets:** | **build** | **run** | ++| `x86_64-pc-windows-msvc` | ✓ | ✓ | ++| `i686-pc-windows-msvc` | ✓ | ✓ | ++| `x86_64-pc-windows-gnu` | ✗ | ✗ | ++| `i686-pc-windows-gnu` | ✗ | ✗ | ++| **WebAssembly targets:** | **build** | **run** | ++| `wasm32-unknown-unknown` | ✓ | ✓ | ++| **Android targets:** | **build** | **run** | ++| `x86_64-linux-android` | ✓ | ✓ | ++| `arm-linux-androideabi` | ✓ | ✓ | ++| `aarch64-linux-android` | ✓ | ✗ | ++| `thumbv7neon-linux-androideabi` | ✓ | ✓ | ++| **iOS targets:** | **build** | **run** | ++| `i386-apple-ios` | ✓ | ✗ | ++| `x86_64-apple-ios` | ✓ | ✗ | ++| `armv7-apple-ios` | ✓ | ✗** | ++| `aarch64-apple-ios` | ✓ | ✗** | ++| **xBSD targets:** | **build** | **run** | ++| `i686-unknown-freebsd` | ✗ | ✗** | ++| `x86_64-unknown-freebsd` | ✗ | ✗** | ++| `x86_64-unknown-netbsd` | ✗ | ✗** | ++| **Solaris targets:** | **build** | **run** | ++| `x86_64-sun-solaris` | ✗ | ✗** | ++ ++[*] most of the test suite passes correctly on these platform but ++there are correctness bugs open in the issue tracker. ++ ++[**] it is currently not easily possible to run these platforms on CI. ++ ++## Machine code verification ++ ++The ++[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify) ++crate tests disassembles the portable packed vector APIs at run-time and ++compares the generated machine code against the desired one to make sure that ++this crate remains efficient. ++ ++## License ++ ++This project is licensed under either of ++ ++* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) ++ ([LICENSE-APACHE](LICENSE-APACHE)) ++ ++* [MIT License](http://opensource.org/licenses/MIT) ++ ([LICENSE-MIT](LICENSE-MIT)) ++ ++at your option. ++ ++## Contributing ++ ++We welcome all people who want to contribute. ++Please see the [contributing instructions] for more information. ++ ++Contributions in any form (issues, pull requests, etc.) to this project ++must adhere to Rust's [Code of Conduct]. ++ ++Unless you explicitly state otherwise, any contribution intentionally submitted ++for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be ++dual licensed as above, without any additional terms or conditions. ++ ++[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd ++[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master ++[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd ++[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true ++[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg ++[crates.io]: https://crates.io/crates/packed_simd ++[docs]: https://docs.rs/packed_simd/badge.svg ++[docs.rs]: https://docs.rs/packed_simd/ ++[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/ ++[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ ++[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366 ++[ISPC]: https://ispc.github.io/ ++[rayon]: https://crates.io/crates/rayon ++[boost_license]: https://www.boost.org/LICENSE_1_0.txt ++[SLEEF]: https://sleef.org/ ++[sleef_sys]: https://crates.io/crates/sleef-sys ++[contributing instructions]: contributing.md ++[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html +diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml +new file mode 100644 +index 000000000000..5b400a4ce440 +--- /dev/null ++++ b/third_party/rust/packed_simd/rustfmt.toml +@@ -0,0 +1,7 @@ ++max_width = 79 ++use_small_heuristics = "Max" ++wrap_comments = true ++comment_width = 79 ++fn_args_density = "Compressed" ++edition = "2018" ++error_on_line_overflow = true +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs +new file mode 100644 +index 000000000000..9959a052ae96 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api.rs +@@ -0,0 +1,301 @@ ++//! Implements the Simd<[T; N]> APIs ++ ++crate mod cast; ++#[macro_use] ++mod cmp; ++#[macro_use] ++mod default; ++#[macro_use] ++mod fmt; ++#[macro_use] ++mod from; ++#[macro_use] ++mod hash; ++#[macro_use] ++mod math; ++#[macro_use] ++mod minimal; ++#[macro_use] ++mod ops; ++#[macro_use] ++mod ptr; ++#[macro_use] ++mod reductions; ++#[macro_use] ++mod select; ++#[macro_use] ++mod shuffle; ++#[macro_use] ++mod shuffle1_dyn; ++#[macro_use] ++mod slice; ++#[macro_use] ++mod swap_bytes; ++#[macro_use] ++mod bit_manip; ++ ++#[cfg(feature = "into_bits")] ++crate mod into_bits; ++ ++macro_rules! impl_i { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_scalar_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_int_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_integer_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1) ++ ); ++ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ++ ); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); ++ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_u { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_scalar_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_int_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_integer_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0) ++ ); ++ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ++ ); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); ++ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_f { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_float_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_float_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.) ++ ); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ ++ impl_float_consts!([$elem_ty; $elem_n]: $tuple_id); ++ impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty); ++ ++ // floating-point math ++ impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.) ++ | $test_tt ++ ); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt); ++ test_reduction_float_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_m { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* ++ | $(#[$doc:meta])*) => { ++ impl_minimal_mask!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])* ++ ); ++ impl_ops_vector_mask_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_ops_scalar_mask_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_reduction_bitwise!( ++ [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x != 0 }) | (true, false) ++ ); ++ impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ | (crate::$elem_ty::new(true), true) ++ ); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([bool; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_cmp_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false) ++ | $test_tt ++ ); ++ impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true) ++ ); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ ++ test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_const_p { ++ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_p!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty ++ | ref_ | $test_tt | $($elem_ids),* ++ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ++ ); ++ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ } ++} ++ ++macro_rules! impl_mut_p { ++ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_p!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty ++ | ref_mut_ | $test_tt | $($elem_ids),* ++ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ++ ); ++ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs +new file mode 100644 +index 000000000000..3d3c4eb8850a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/bit_manip.rs +@@ -0,0 +1,128 @@ ++//! Bit manipulations. ++ ++macro_rules! impl_bit_manip { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the number of ones in the binary representation of ++ /// the lanes of `self`. ++ #[inline] ++ pub fn count_ones(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctpop(self) ++ } ++ ++ /// Returns the number of zeros in the binary representation of ++ /// the lanes of `self`. ++ #[inline] ++ pub fn count_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctpop(!self) ++ } ++ ++ /// Returns the number of leading zeros in the binary ++ /// representation of the lanes of `self`. ++ #[inline] ++ pub fn leading_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctlz(self) ++ } ++ ++ /// Returns the number of trailing zeros in the binary ++ /// representation of the lanes of `self`. ++ #[inline] ++ pub fn trailing_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::cttz(self) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item_with_macros! { ++ #[allow(overflowing_literals)] ++ pub mod [<$id _bit_manip>] { ++ use super::*; ++ ++ const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; ++ ++ macro_rules! test_func { ++ ($x:expr, $func:ident) => {{ ++ let mut actual = $x; ++ for i in 0..$id::lanes() { ++ actual = actual.replace( ++ i, ++ $x.extract(i).$func() as $elem_ty ++ ); ++ } ++ let expected = $x.$func(); ++ assert_eq!(actual, expected); ++ }}; ++ } ++ ++ const BYTES: [u8; 64] = [ ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, ++ 24, 25, 26, 27, 28, 29, 30, 31, ++ 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, 43, 44, 45, 46, 47, ++ 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, ++ ]; ++ ++ fn load_bytes() -> $id { ++ let elems: &mut [$elem_ty] = unsafe { ++ slice::from_raw_parts_mut( ++ BYTES.as_mut_ptr() as *mut $elem_ty, ++ $id::lanes(), ++ ) ++ }; ++ $id::from_slice_unaligned(elems) ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn count_ones() { ++ test_func!($id::splat(0), count_ones); ++ test_func!($id::splat(!0), count_ones); ++ test_func!(load_bytes(), count_ones); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn count_zeros() { ++ test_func!($id::splat(0), count_zeros); ++ test_func!($id::splat(!0), count_zeros); ++ test_func!(load_bytes(), count_zeros); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn leading_zeros() { ++ test_func!($id::splat(0), leading_zeros); ++ test_func!($id::splat(1), leading_zeros); ++ // some implementations use `pshufb` which has unique ++ // behavior when the 8th bit is set. ++ test_func!($id::splat(0b1000_0010), leading_zeros); ++ test_func!($id::splat(!0), leading_zeros); ++ test_func!( ++ $id::splat(1 << (LANE_WIDTH - 1)), ++ leading_zeros ++ ); ++ test_func!(load_bytes(), leading_zeros); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn trailing_zeros() { ++ test_func!($id::splat(0), trailing_zeros); ++ test_func!($id::splat(1), trailing_zeros); ++ test_func!($id::splat(0b1000_0010), trailing_zeros); ++ test_func!($id::splat(!0), trailing_zeros); ++ test_func!( ++ $id::splat(1 << (LANE_WIDTH - 1)), ++ trailing_zeros ++ ); ++ test_func!(load_bytes(), trailing_zeros); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs +new file mode 100644 +index 000000000000..f1c32ca1a38b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast.rs +@@ -0,0 +1,108 @@ ++//! Implementation of `FromCast` and `IntoCast`. ++#![allow(clippy::module_name_repetitions)] ++ ++/// Numeric cast from `T` to `Self`. ++/// ++/// > Note: This is a temporary workaround until the conversion traits ++/// specified > in [RFC2484] are implemented. ++/// ++/// Numeric cast between vectors with the same number of lanes, such that: ++/// ++/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` ++/// -> `u32xN`) is a **no-op**, ++/// ++/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++/// `u8xN`) will **truncate**, ++/// ++/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++/// `u32xN`) will: ++/// * **zero-extend** if the source is unsigned, or ++/// * **sign-extend** if the source is signed, ++/// ++/// * casting from a float to an integer will **round the float towards zero**, ++/// ++/// * casting from an integer to float will produce the floating point ++/// representation of the integer, **rounding to nearest, ties to even**, ++/// ++/// * casting from an `f32` to an `f64` is perfect and lossless, ++/// ++/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++/// ++/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 ++pub trait FromCast: crate::marker::Sized { ++ /// Numeric cast from `T` to `Self`. ++ fn from_cast(_: T) -> Self; ++} ++ ++/// Numeric cast from `Self` to `T`. ++/// ++/// > Note: This is a temporary workaround until the conversion traits ++/// specified > in [RFC2484] are implemented. ++/// ++/// Numeric cast between vectors with the same number of lanes, such that: ++/// ++/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` ++/// -> `u32xN`) is a **no-op**, ++/// ++/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++/// `u8xN`) will **truncate**, ++/// ++/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++/// `u32xN`) will: ++/// * **zero-extend** if the source is unsigned, or ++/// * **sign-extend** if the source is signed, ++/// ++/// * casting from a float to an integer will **round the float towards zero**, ++/// ++/// * casting from an integer to float will produce the floating point ++/// representation of the integer, **rounding to nearest, ties to even**, ++/// ++/// * casting from an `f32` to an `f64` is perfect and lossless, ++/// ++/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++/// ++/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 ++pub trait Cast: crate::marker::Sized { ++ /// Numeric cast from `self` to `T`. ++ fn cast(self) -> T; ++} ++ ++/// `FromCast` implies `Cast`. ++impl Cast for T ++where ++ U: FromCast, ++{ ++ #[inline] ++ fn cast(self) -> U { ++ U::from_cast(self) ++ } ++} ++ ++/// `FromCast` and `Cast` are reflexive ++impl FromCast for T { ++ #[inline] ++ fn from_cast(t: Self) -> Self { ++ t ++ } ++} ++ ++#[macro_use] ++mod macros; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; +diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs +new file mode 100644 +index 000000000000..3bb29f0b80b7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/macros.rs +@@ -0,0 +1,82 @@ ++//! Macros implementing `FromCast` ++ ++macro_rules! impl_from_cast_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::cast::FromCast<$from_ty> for $id { ++ #[inline] ++ fn from_cast(x: $from_ty) -> Self { ++ use crate::llvm::simd_cast; ++ debug_assert_eq!($from_ty::lanes(), $id::lanes()); ++ Simd(unsafe { simd_cast(x.0) }) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_cast_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ assert_eq!($id::lanes(), $from_ty::lanes()); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_cast { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++macro_rules! impl_from_cast_mask_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::cast::FromCast<$from_ty> for $id { ++ #[inline] ++ fn from_cast(x: $from_ty) -> Self { ++ debug_assert_eq!($from_ty::lanes(), $id::lanes()); ++ x.ne($from_ty::default()) ++ .select($id::splat(true), $id::splat(false)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_cast_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ assert_eq!($id::lanes(), $from_ty::lanes()); ++ ++ let x = $from_ty::default(); ++ let m: $id = x.cast(); ++ assert!(m.none()); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_cast_mask { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_mask_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++#[allow(unused)] ++macro_rules! impl_into_cast { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_!($from_ty[$test_tt]: $id); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs +new file mode 100644 +index 000000000000..78c07f3a5597 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v128.rs +@@ -0,0 +1,79 @@ ++//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++ ++impl_from_cast!( ++ i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++ ++impl_from_cast!( ++ isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 ++); ++impl_from_cast!( ++ usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 ++); ++impl_from_cast_mask!( ++ msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 ++); ++ ++// FIXME[test_v128]: 64-bit single element vectors into_cast impls ++impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); ++impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); ++impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); +diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs +new file mode 100644 +index 000000000000..d292936baa41 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v16.rs +@@ -0,0 +1,17 @@ ++//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs +new file mode 100644 +index 000000000000..0a669e0beebe +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v256.rs +@@ -0,0 +1,81 @@ ++//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); ++impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); ++impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); ++ ++impl_from_cast!( ++ i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++ ++impl_from_cast!( ++ i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 ++); ++ ++impl_from_cast!( ++ isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 ++); ++impl_from_cast!( ++ usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 ++); ++impl_from_cast_mask!( ++ msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs +new file mode 100644 +index 000000000000..65050cdacb4e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v32.rs +@@ -0,0 +1,30 @@ ++//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs +new file mode 100644 +index 000000000000..9ae1caed35e2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v512.rs +@@ -0,0 +1,68 @@ ++//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); ++impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); ++impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); ++ ++impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); ++impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); ++impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); ++ ++impl_from_cast!( ++ i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 ++); ++ ++impl_from_cast!( ++ i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 ++); ++impl_from_cast!( ++ usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 ++); ++impl_from_cast_mask!( ++ msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs +new file mode 100644 +index 000000000000..0e2f78f7335b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v64.rs +@@ -0,0 +1,47 @@ ++//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs +new file mode 100644 +index 000000000000..6d5301ddddbd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp.rs +@@ -0,0 +1,16 @@ ++//! Implement cmp traits for vector types ++ ++#[macro_use] ++mod partial_eq; ++ ++#[macro_use] ++mod eq; ++ ++#[macro_use] ++mod partial_ord; ++ ++#[macro_use] ++mod ord; ++ ++#[macro_use] ++mod vertical; +diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs +new file mode 100644 +index 000000000000..3c55d0dce57e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs +@@ -0,0 +1,27 @@ ++//! Implements `Eq` for vector types. ++ ++macro_rules! impl_cmp_eq { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::cmp::Eq for $id {} ++ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_eq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::splat($false); ++ foo(a); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs +new file mode 100644 +index 000000000000..e54ba3bfde9a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs +@@ -0,0 +1,43 @@ ++//! Implements `Ord` for vector types. ++ ++macro_rules! impl_cmp_ord { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl $id { ++ /// Returns a wrapper that implements `Ord`. ++ #[inline] ++ pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { ++ LexicographicallyOrdered(*self) ++ } ++ } ++ ++ impl crate::cmp::Ord for LexicographicallyOrdered<$id> { ++ #[inline] ++ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { ++ match self.partial_cmp(other) { ++ Some(x) => x, ++ None => unsafe { crate::hint::unreachable_unchecked() }, ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_ord>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::splat($false); ++ foo(a.partial_lex_ord()); ++ foo(a.lex_ord()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs +new file mode 100644 +index 000000000000..1712a0de56cb +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs +@@ -0,0 +1,67 @@ ++//! Implements `PartialEq` for vector types. ++ ++macro_rules! impl_cmp_partial_eq { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ $id::eq(*self, *other).all() ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ $id::ne(*self, *other).any() ++ } ++ } ++ ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialEq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_eq() { ++ let a = $id::splat($false); ++ let b = $id::splat($true); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ ++ if $id::lanes() > 1 { ++ let a = $id::splat($false).replace(0, $true); ++ let b = $id::splat($true); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs +new file mode 100644 +index 000000000000..a2292918bae1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs +@@ -0,0 +1,234 @@ ++//! Implements `PartialOrd` for vector types. ++//! ++//! This implements a lexicographical order. ++ ++macro_rules! impl_cmp_partial_ord { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns a wrapper that implements `PartialOrd`. ++ #[inline] ++ pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { ++ LexicographicallyOrdered(*self) ++ } ++ } ++ ++ impl crate::cmp::PartialOrd> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn partial_cmp( ++ &self, other: &Self, ++ ) -> Option { ++ if PartialEq::eq(self, other) { ++ Some(crate::cmp::Ordering::Equal) ++ } else if PartialOrd::lt(self, other) { ++ Some(crate::cmp::Ordering::Less) ++ } else if PartialOrd::gt(self, other) { ++ Some(crate::cmp::Ordering::Greater) ++ } else { ++ None ++ } ++ } ++ #[inline] ++ fn lt(&self, other: &Self) -> bool { ++ let m_lt = self.0.lt(other.0); ++ let m_eq = self.0.eq(other.0); ++ for i in 0..$id::lanes() { ++ if m_eq.extract(i) { ++ continue; ++ } ++ return m_lt.extract(i); ++ } ++ false ++ } ++ #[inline] ++ fn le(&self, other: &Self) -> bool { ++ self.lt(other) | PartialEq::eq(self, other) ++ } ++ #[inline] ++ fn ge(&self, other: &Self) -> bool { ++ self.gt(other) | PartialEq::eq(self, other) ++ } ++ #[inline] ++ fn gt(&self, other: &Self) -> bool { ++ let m_gt = self.0.gt(other.0); ++ let m_eq = self.0.eq(other.0); ++ for i in 0..$id::lanes() { ++ if m_eq.extract(i) { ++ continue; ++ } ++ return m_gt.extract(i); ++ } ++ false ++ } ++ } ++ }; ++} ++ ++macro_rules! test_cmp_partial_ord_int { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialOrd>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_lex_ord() { ++ use crate::testing::utils::{test_cmp}; ++ // constant values ++ let a = $id::splat(0); ++ let b = $id::splat(1); ++ ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] ++ let mut a = $id::splat(0); ++ let mut b = $id::splat(0); ++ for i in 0..$id::lanes() { ++ a = a.replace(i, i as $elem_ty); ++ b = b.replace(i, ($id::lanes() - i) as $elem_ty); ++ } ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] ++ let mut b = a; ++ b = b.replace( ++ $id::lanes() - 1, ++ a.extract($id::lanes() - 1) + 1 as $elem_ty ++ ); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ if $id::lanes() > 2 { ++ // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] ++ let b = a; ++ let mut a = $id::splat(0); ++ a = a.replace(1, 1 as $elem_ty); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] ++ let mut b = a; ++ b = b.replace( ++ 2, a.extract($id::lanes() - 1) + 1 as $elem_ty ++ ); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_cmp_partial_ord_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialOrd>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_lex_ord() { ++ use crate::testing::utils::{test_cmp}; ++ use crate::cmp::Ordering; ++ ++ // constant values ++ let a = $id::splat(false); ++ let b = $id::splat(true); ++ ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ // variable values: ++ // a = [false, false, false, false]; ++ // b = [false, false, false, true] ++ let a = $id::splat(false); ++ let mut b = $id::splat(false); ++ b = b.replace($id::lanes() - 1, true); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ // variable values: ++ // a = [true, true, true, false]; ++ // b = [true, true, true, true] ++ let mut a = $id::splat(true); ++ let b = $id::splat(true); ++ a = a.replace($id::lanes() - 1, false); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ if $id::lanes() > 2 { ++ // variable values ++ // a = [false, true, false, false]; ++ // b = [false, true, true, true] ++ let mut a = $id::splat(false); ++ let mut b = $id::splat(true); ++ a = a.replace(1, true); ++ b = b.replace(0, false); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs +new file mode 100644 +index 000000000000..ea4a0d1a3467 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs +@@ -0,0 +1,114 @@ ++//! Vertical (lane-wise) vector comparisons returning vector masks. ++ ++macro_rules! impl_cmp_vertical { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident, ++ $mask_ty:ident, ++ $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt ++ ) => { ++ impl $id { ++ /// Lane-wise equality comparison. ++ #[inline] ++ pub fn eq(self, other: Self) -> $mask_ty { ++ use crate::llvm::simd_eq; ++ Simd(unsafe { simd_eq(self.0, other.0) }) ++ } ++ ++ /// Lane-wise inequality comparison. ++ #[inline] ++ pub fn ne(self, other: Self) -> $mask_ty { ++ use crate::llvm::simd_ne; ++ Simd(unsafe { simd_ne(self.0, other.0) }) ++ } ++ ++ /// Lane-wise less-than comparison. ++ #[inline] ++ pub fn lt(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_gt, simd_lt}; ++ if $is_mask { ++ Simd(unsafe { simd_gt(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_lt(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise less-than-or-equals comparison. ++ #[inline] ++ pub fn le(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_ge, simd_le}; ++ if $is_mask { ++ Simd(unsafe { simd_ge(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_le(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise greater-than comparison. ++ #[inline] ++ pub fn gt(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_gt, simd_lt}; ++ if $is_mask { ++ Simd(unsafe { simd_lt(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_gt(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise greater-than-or-equals comparison. ++ #[inline] ++ pub fn ge(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_ge, simd_le}; ++ if $is_mask { ++ Simd(unsafe { simd_le(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_ge(self.0, other.0) }) ++ } ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_vertical>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cmp() { ++ let a = $id::splat($false); ++ let b = $id::splat($true); ++ ++ let r = a.lt(b); ++ let e = $mask_ty::splat(true); ++ assert!(r == e); ++ let r = a.le(b); ++ assert!(r == e); ++ ++ let e = $mask_ty::splat(false); ++ let r = a.gt(b); ++ assert!(r == e); ++ let r = a.ge(b); ++ assert!(r == e); ++ let r = a.eq(b); ++ assert!(r == e); ++ ++ let mut a = a; ++ let mut b = b; ++ let mut e = e; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ a = a.replace(i, $false); ++ b = b.replace(i, $true); ++ e = e.replace(i, true); ++ } else { ++ a = a.replace(i, $true); ++ b = b.replace(i, $false); ++ e = e.replace(i, false); ++ } ++ } ++ let r = a.lt(b); ++ assert!(r == e); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs +new file mode 100644 +index 000000000000..843d51bcc4bb +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/default.rs +@@ -0,0 +1,28 @@ ++//! Implements `Default` for vector types. ++ ++macro_rules! impl_default { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ Self::splat($elem_ty::default()) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _default>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn default() { ++ let a = $id::default(); ++ for i in 0..$id::lanes() { ++ assert_eq!(a.extract(i), $elem_ty::default()); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs +new file mode 100644 +index 000000000000..f3f55c401548 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt.rs +@@ -0,0 +1,12 @@ ++//! Implements formatting APIs ++ ++#[macro_use] ++mod debug; ++#[macro_use] ++mod lower_hex; ++#[macro_use] ++mod upper_hex; ++#[macro_use] ++mod octal; ++#[macro_use] ++mod binary; +diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs +new file mode 100644 +index 000000000000..b60769082d51 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs +@@ -0,0 +1,56 @@ ++//! Implement Octal formatting ++ ++macro_rules! impl_fmt_binary { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Binary for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_binary>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn binary() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#b}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#b}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs +new file mode 100644 +index 000000000000..ad0b8a59a1f0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs +@@ -0,0 +1,62 @@ ++//! Implement debug formatting ++ ++macro_rules! impl_fmt_debug_tests { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_debug>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn debug() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::default(); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:?}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:?}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_fmt_debug { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Debug for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs +new file mode 100644 +index 000000000000..5a7aa14b5b8a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs +@@ -0,0 +1,56 @@ ++//! Implement `LowerHex` formatting ++ ++macro_rules! impl_fmt_lower_hex { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::LowerHex for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_lower_hex>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn lower_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#x}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#x}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs +new file mode 100644 +index 000000000000..83ac8abc7dae +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs +@@ -0,0 +1,56 @@ ++//! Implement Octal formatting ++ ++macro_rules! impl_fmt_octal { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Octal for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_octal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn octal_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#o}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#o}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs +new file mode 100644 +index 000000000000..aa88f673abf0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs +@@ -0,0 +1,56 @@ ++//! Implement `UpperHex` formatting ++ ++macro_rules! impl_fmt_upper_hex { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::UpperHex for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_upper_hex>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn upper_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#X}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#X}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs +new file mode 100644 +index 000000000000..c30c4d6e216d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from.rs +@@ -0,0 +1,7 @@ ++//! Implementations of the `From` and `Into` traits ++ ++#[macro_use] ++mod from_array; ++ ++#[macro_use] ++mod from_vector; +diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs +new file mode 100644 +index 000000000000..964d1501df6a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from/from_array.rs +@@ -0,0 +1,121 @@ ++//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. ++ ++macro_rules! impl_from_array { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | ($non_default_array:expr, $non_default_vec:expr)) => { ++ impl From<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn from(array: [$elem_ty; $elem_count]) -> Self { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { array }.vec } ++ } ++ } ++ ++ impl From<$id> for [$elem_ty; $elem_count] { ++ #[inline] ++ fn from(vec: $id) -> Self { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { vec }.array } ++ } ++ } ++ ++ // FIXME: `Into::into` is not inline, but due to ++ // the blanket impl in `std`, which is not ++ // marked `default`, we cannot override it here with ++ // specialization. ++ /* ++ impl Into<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn into(self) -> [$elem_ty; $elem_count] { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { vec: self }.array } ++ } ++ } ++ ++ impl Into<$id> for [$elem_ty; $elem_count] { ++ #[inline] ++ fn into(self) -> $id { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { array: self }.vec } ++ } ++ } ++ */ ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _from>] { ++ use super::*; ++ #[test] ++ fn array() { ++ let vec: $id = Default::default(); ++ ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ // ++ // Safe because we never take a reference to any ++ // uninitialized element. ++ union W { ++ array: [$elem_ty; $elem_count], ++ other: () ++ } ++ let mut array = W { other: () }; ++ for i in 0..$elem_count { ++ let default: $elem_ty = Default::default(); ++ // note: array.other is the active member and ++ // initialized so we can take a reference to it: ++ let p = unsafe { ++ &mut array.other as *mut () as *mut $elem_ty ++ }; ++ // note: default is a valid bit-pattern for ++ // $elem_ty: ++ unsafe { ++ crate::ptr::write(p.wrapping_add(i), default) ++ }; ++ } ++ // note: the array variant of the union is properly ++ // initialized: ++ let mut array = unsafe { ++ array.array ++ }; ++ ++ array[0] = $non_default_array; ++ let vec = vec.replace(0, $non_default_vec); ++ ++ let vec_from_array = $id::from(array); ++ assert_eq!(vec_from_array, vec); ++ let array_from_vec ++ = <[$elem_ty; $elem_count]>::from(vec); ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ for i in 0..$elem_count { ++ assert_eq!(array_from_vec[i], array[i]); ++ } ++ ++ let vec_from_into_array: $id = array.into(); ++ assert_eq!(vec_from_into_array, vec); ++ let array_from_into_vec: [$elem_ty; $elem_count] ++ = vec.into(); ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ for i in 0..$elem_count { ++ assert_eq!(array_from_into_vec[i], array[i]); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs +new file mode 100644 +index 000000000000..55f70016d51d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs +@@ -0,0 +1,67 @@ ++//! Implements `From` and `Into` for vector types. ++ ++macro_rules! impl_from_vector { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | $source:ident) => { ++ impl From<$source> for $id { ++ #[inline] ++ fn from(source: $source) -> Self { ++ fn static_assert_same_number_of_lanes() ++ where ++ T: crate::sealed::Simd, ++ U: crate::sealed::Simd, ++ { ++ } ++ use crate::llvm::simd_cast; ++ static_assert_same_number_of_lanes::<$id, $source>(); ++ Simd(unsafe { simd_cast(source.0) }) ++ } ++ } ++ ++ // FIXME: `Into::into` is not inline, but due to the blanket impl in ++ // `std`, which is not marked `default`, we cannot override it here ++ // with specialization. ++ ++ /* ++ impl Into<$id> for $source { ++ #[inline] ++ fn into(self) -> $id { ++ unsafe { simd_cast(self) } ++ } ++ } ++ */ ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_ $source>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from() { ++ assert_eq!($id::lanes(), $source::lanes()); ++ let source: $source = Default::default(); ++ let vec: $id = Default::default(); ++ ++ let e = $id::from(source); ++ assert_eq!(e, vec); ++ ++ let e: $id = source.into(); ++ assert_eq!(e, vec); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_vectors { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | $($source:ident),*) => { ++ $( ++ impl_from_vector!( ++ [$elem_ty; $elem_count]: $id | $test_tt | $source ++ ); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs +new file mode 100644 +index 000000000000..08d42496ea8b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/hash.rs +@@ -0,0 +1,47 @@ ++//! Implements `Hash` for vector types. ++ ++macro_rules! impl_hash { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ unsafe { ++ union A { ++ data: [$elem_ty; $id::lanes()], ++ vec: $id, ++ } ++ A { vec: *self }.data.hash(state) ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _hash>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn hash() { ++ use crate::hash::{Hash, Hasher}; ++ #[allow(deprecated)] ++ use crate::hash::{SipHasher13}; ++ type A = [$elem_ty; $id::lanes()]; ++ let a: A = [42 as $elem_ty; $id::lanes()]; ++ assert_eq!( ++ crate::mem::size_of::(), ++ crate::mem::size_of::<$id>() ++ ); ++ #[allow(deprecated)] ++ let mut a_hash = SipHasher13::new(); ++ let mut v_hash = a_hash.clone(); ++ a.hash(&mut a_hash); ++ ++ let v = $id::splat(42 as $elem_ty); ++ v.hash(&mut v_hash); ++ assert_eq!(a_hash.finish(), v_hash.finish()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs +new file mode 100644 +index 000000000000..f2cc1bae5397 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits.rs +@@ -0,0 +1,59 @@ ++//! Implementation of `FromBits` and `IntoBits`. ++ ++/// Safe lossless bitwise conversion from `T` to `Self`. ++pub trait FromBits: crate::marker::Sized { ++ /// Safe lossless bitwise transmute from `T` to `Self`. ++ fn from_bits(t: T) -> Self; ++} ++ ++/// Safe lossless bitwise conversion from `Self` to `T`. ++pub trait IntoBits: crate::marker::Sized { ++ /// Safe lossless bitwise transmute from `self` to `T`. ++ fn into_bits(self) -> T; ++} ++ ++/// `FromBits` implies `IntoBits`. ++impl IntoBits for T ++where ++ U: FromBits, ++{ ++ #[inline] ++ fn into_bits(self) -> U { ++ debug_assert!( ++ crate::mem::size_of::() == crate::mem::size_of::() ++ ); ++ U::from_bits(self) ++ } ++} ++ ++/// `FromBits` and `IntoBits` are reflexive ++impl FromBits for T { ++ #[inline] ++ fn from_bits(t: Self) -> Self { ++ t ++ } ++} ++ ++#[macro_use] ++mod macros; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; ++ ++mod arch_specific; ++pub use self::arch_specific::*; +diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs +new file mode 100644 +index 000000000000..6cc2fa37b728 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs +@@ -0,0 +1,190 @@ ++//! `FromBits` and `IntoBits` between portable vector types and the ++//! architecture-specific vector types. ++#![rustfmt::skip] ++ ++// FIXME: MIPS FromBits/IntoBits ++ ++#[allow(unused)] ++use crate::*; ++ ++/// This macro implements FromBits for the portable and the architecture ++/// specific vector types. ++/// ++/// The "leaf" case is at the bottom, and the most generic case is at the top. ++/// The generic case is split into smaller cases recursively. ++macro_rules! impl_arch { ++ ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], ++ $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | ++ from: $($from_ty:ident),* | into: $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ impl_arch!( ++ [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | ++ from: $($from_ty),* | ++ into: $($into_ty),* | ++ test: $test_tt ++ ); ++ impl_arch!( ++ $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | ++ from: $($from_ty),* | ++ into: $($into_ty),* | ++ test: $test_tt ++ ); ++ }; ++ ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | ++ from: $($from_ty:ident),* | into: $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ // note: if target is "arm", "+v7,+neon" must be enabled ++ // and the std library must be recompiled with them ++ #[cfg(any( ++ not(target_arch = "arm"), ++ all(target_feature = "v7", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon))) ++ )] ++ // note: if target is "powerpc", "altivec" must be enabled ++ // and the std library must be recompiled with it ++ #[cfg(any( ++ not(target_arch = "powerpc"), ++ all(target_feature = "altivec", feature = "core_arch"), ++ ))] ++ #[cfg(target_arch = $arch_tt)] ++ use crate::arch::$arch::{ ++ $($arch_ty),* ++ }; ++ ++ #[cfg(any( ++ not(target_arch = "arm"), ++ all(target_feature = "v7", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon))) ++ )] ++ #[cfg(any( ++ not(target_arch = "powerpc"), ++ all(target_feature = "altivec", feature = "core_arch"), ++ ))] ++ #[cfg(target_arch = $arch_tt)] ++ impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ }; ++ ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* ++ | $($into_ty:ident),* | test: $test_tt:tt) => { ++ impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ }; ++ ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); ++ impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); ++ }; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 64-bit wide vector types: ++ ++// FIXME: 64-bit single element types ++// FIXME: arm/aarch float16x4_t missing ++impl_arch!( ++ [x86["x86"]: __m64], [x86_64["x86_64"]: __m64], ++ [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, ++ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, ++ uint64x1_t], ++ [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, ++ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, ++ float64x1_t] | ++ from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | ++ into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | ++ test: test_v64 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 128-bit wide vector types: ++ ++// FIXME: arm/aarch float16x8_t missing ++// FIXME: ppc vector_pixel missing ++// FIXME: ppc64 vector_Float16 missing ++// FIXME: ppc64 vector_signed_long_long missing ++// FIXME: ppc64 vector_unsigned_long_long missing ++// FIXME: ppc64 vector_bool_long_long missing ++// FIXME: ppc64 vector_signed___int128 missing ++// FIXME: ppc64 vector_unsigned___int128 missing ++impl_arch!( ++ [x86["x86"]: __m128, __m128i, __m128d], ++ [x86_64["x86_64"]: __m128, __m128i, __m128d], ++ [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, ++ poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], ++ [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, ++ uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, ++ uint64x2_t, float64x2_t], ++ [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, ++ vector_signed_short, vector_unsigned_short, vector_signed_int, ++ vector_unsigned_int, vector_float], ++ [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, ++ vector_signed_short, vector_unsigned_short, vector_signed_int, ++ vector_unsigned_int, vector_float, vector_signed_long, ++ vector_unsigned_long, vector_double] | ++ from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, ++ i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, ++ i128x1, u128x1 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_char], ++ [powerpc64["powerpc64"]: vector_bool_char] | ++ from: m8x16, m16x8, m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_short], ++ [powerpc64["powerpc64"]: vector_bool_short] | ++ from: m16x8, m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_int], ++ [powerpc64["powerpc64"]: vector_bool_int] | ++ from: m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8, m32x4 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc64["powerpc64"]: vector_bool_long] | ++ from: m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8, m32x4, m64x2 | ++ test: test_v128 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 256-bit wide vector types ++ ++impl_arch!( ++ [x86["x86"]: __m256, __m256i, __m256d], ++ [x86_64["x86_64"]: __m256, __m256i, __m256d] | ++ from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, ++ i32x8, u32x8, f32x8, m32x8, ++ i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | ++ into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, ++ i64x4, u64x4, f64x4, i128x2, u128x2 | ++ test: test_v256 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// FIXME: Implementations for the 512-bit wide vector types +diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs +new file mode 100644 +index 000000000000..8cec5b00479f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs +@@ -0,0 +1,74 @@ ++//! Macros implementing `FromBits` ++ ++macro_rules! impl_from_bits_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::into_bits::FromBits<$from_ty> for $id { ++ #[inline] ++ fn from_bits(x: $from_ty) -> Self { ++ unsafe { crate::mem::transmute(x) } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_bits_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ use crate::{ ++ ptr::{read_unaligned}, ++ mem::{size_of, zeroed} ++ }; ++ use crate::IntoBits; ++ assert_eq!(size_of::<$id>(), ++ size_of::<$from_ty>()); ++ // This is safe becasue we never create a reference to ++ // uninitialized memory: ++ let a: $from_ty = unsafe { zeroed() }; ++ ++ let b_0: $id = crate::FromBits::from_bits(a); ++ let b_1: $id = a.into_bits(); ++ ++ // Check that these are byte-wise equal, that is, ++ // that the bit patterns are identical: ++ for i in 0..size_of::<$id>() { ++ // This is safe because we only read initialized ++ // memory in bounds. Also, taking a reference to ++ // `b_i` is ok because the fields are initialized. ++ unsafe { ++ let b_0_v: u8 = read_unaligned( ++ (&b_0 as *const $id as *const u8) ++ .wrapping_add(i) ++ ); ++ let b_1_v: u8 = read_unaligned( ++ (&b_1 as *const $id as *const u8) ++ .wrapping_add(i) ++ ); ++ assert_eq!(b_0_v, b_1_v); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_bits { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_bits_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++#[allow(unused)] ++macro_rules! impl_into_bits { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_bits_!($from_ty[$test_tt]: $id); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs +new file mode 100644 +index 000000000000..804dbf282d53 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs +@@ -0,0 +1,28 @@ ++//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); ++ ++impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); ++ ++impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); ++ ++impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m64x2[test_v128]: m128x1); ++ ++impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); ++impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); ++// note: m128x1 cannot be constructed from all the other masks bit patterns in here ++ +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs +new file mode 100644 +index 000000000000..1162a62e5bd1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs +@@ -0,0 +1,9 @@ ++//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); ++impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); ++// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs +new file mode 100644 +index 000000000000..cc7a6646b535 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs +@@ -0,0 +1,27 @@ ++//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); ++ ++impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); ++ ++impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); ++ ++impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m64x4[test_v256]: m128x2); ++ ++impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); ++impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); ++// note: m128x2 cannot be constructed from all the other masks bit patterns in here +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs +new file mode 100644 +index 000000000000..2c183ecf1c77 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs +@@ -0,0 +1,13 @@ ++//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); ++impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); ++impl_from_bits!(m8x4[test_v32]: m16x2); ++ ++impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); ++impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); ++// note: m16x2 cannot be constructed from all m8x4 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs +new file mode 100644 +index 000000000000..8dec6a7f63a0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs +@@ -0,0 +1,27 @@ ++//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); ++ ++impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); ++ ++impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); ++ ++impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m64x8[test_v512]: m128x4); ++ ++impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); ++impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); ++// note: m128x4 cannot be constructed from all the other masks bit patterns in here +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs +new file mode 100644 +index 000000000000..8999d98e13f8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs +@@ -0,0 +1,18 @@ ++//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); ++ ++impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(m16x4[test_v64]: m32x2); ++ ++impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); ++impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); ++impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); ++// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs +new file mode 100644 +index 000000000000..e7a8d256baf5 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math.rs +@@ -0,0 +1,4 @@ ++//! Implements vertical math operations ++ ++#[macro_use] ++mod float; +diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs +new file mode 100644 +index 000000000000..c0ec46e91789 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float.rs +@@ -0,0 +1,61 @@ ++//! Implements vertical floating-point math operations. ++ ++#[macro_use] ++mod abs; ++ ++#[macro_use] ++mod consts; ++ ++#[macro_use] ++mod cos; ++ ++#[macro_use] ++mod exp; ++ ++#[macro_use] ++mod powf; ++ ++#[macro_use] ++mod ln; ++ ++#[macro_use] ++mod mul_add; ++ ++#[macro_use] ++mod mul_adde; ++ ++#[macro_use] ++mod recpre; ++ ++#[macro_use] ++mod rsqrte; ++ ++#[macro_use] ++mod sin; ++ ++#[macro_use] ++mod sqrt; ++ ++#[macro_use] ++mod sqrte; ++ ++macro_rules! impl_float_category { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { ++ impl $id { ++ #[inline] ++ pub fn is_nan(self) -> $mask_ty { ++ self.ne(self) ++ } ++ ++ #[inline] ++ pub fn is_infinite(self) -> $mask_ty { ++ self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) ++ } ++ ++ #[inline] ++ pub fn is_finite(self) -> $mask_ty { ++ !(self.is_nan() | self.is_infinite()) ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs +new file mode 100644 +index 000000000000..1865bdb68ec6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs +@@ -0,0 +1,31 @@ ++//! Implements vertical (lane-wise) floating-point `abs`. ++ ++macro_rules! impl_math_float_abs { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Absolute value. ++ #[inline] ++ pub fn abs(self) -> Self { ++ use crate::codegen::math::float::abs::Abs; ++ Abs::abs(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_abs>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn abs() { ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(o, o.abs()); ++ ++ let mo = $id::splat(-1 as $elem_ty); ++ assert_eq!(o, mo.abs()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs +new file mode 100644 +index 000000000000..89f93a6d692b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs +@@ -0,0 +1,86 @@ ++macro_rules! impl_float_consts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { ++ impl $id { ++ /// Machine epsilon value. ++ pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); ++ ++ /// Smallest finite value. ++ pub const MIN: $id = $id::splat(core::$elem_ty::MIN); ++ ++ /// Smallest positive normal value. ++ pub const MIN_POSITIVE: $id = ++ $id::splat(core::$elem_ty::MIN_POSITIVE); ++ ++ /// Largest finite value. ++ pub const MAX: $id = $id::splat(core::$elem_ty::MAX); ++ ++ /// Not a Number (NaN). ++ pub const NAN: $id = $id::splat(core::$elem_ty::NAN); ++ ++ /// Infinity (∞). ++ pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); ++ ++ /// Negative infinity (-∞). ++ pub const NEG_INFINITY: $id = ++ $id::splat(core::$elem_ty::NEG_INFINITY); ++ ++ /// Archimedes' constant (π) ++ pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); ++ ++ /// π/2 ++ pub const FRAC_PI_2: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_2); ++ ++ /// π/3 ++ pub const FRAC_PI_3: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_3); ++ ++ /// π/4 ++ pub const FRAC_PI_4: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_4); ++ ++ /// π/6 ++ pub const FRAC_PI_6: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_6); ++ ++ /// π/8 ++ pub const FRAC_PI_8: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_8); ++ ++ /// 1/π ++ pub const FRAC_1_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_1_PI); ++ ++ /// 2/π ++ pub const FRAC_2_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_2_PI); ++ ++ /// 2/sqrt(π) ++ pub const FRAC_2_SQRT_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); ++ ++ /// sqrt(2) ++ pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); ++ ++ /// 1/sqrt(2) ++ pub const FRAC_1_SQRT_2: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); ++ ++ /// Euler's number (e) ++ pub const E: $id = $id::splat(core::$elem_ty::consts::E); ++ ++ /// log2(e) ++ pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); ++ ++ /// log10(e) ++ pub const LOG10_E: $id = ++ $id::splat(core::$elem_ty::consts::LOG10_E); ++ ++ /// ln(2) ++ pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); ++ ++ /// ln(10) ++ pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs +new file mode 100644 +index 000000000000..e5b8f46036c7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `cos`. ++ ++macro_rules! impl_math_float_cos { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Cosine. ++ #[inline] ++ pub fn cos(self) -> Self { ++ use crate::codegen::math::float::cos::Cos; ++ Cos::cos(self) ++ } ++ ++ /// Cosine of `self * PI`. ++ #[inline] ++ pub fn cos_pi(self) -> Self { ++ use crate::codegen::math::float::cos_pi::CosPi; ++ CosPi::cos_pi(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_cos>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cos() { ++ use crate::$elem_ty::consts::PI; ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let p = $id::splat(PI as $elem_ty); ++ let ph = $id::splat(PI as $elem_ty / 2.); ++ let z_r = $id::splat((PI as $elem_ty / 2.).cos()); ++ let o_r = $id::splat((PI as $elem_ty).cos()); ++ ++ assert_eq!(o, z.cos()); ++ assert_eq!(z_r, ph.cos()); ++ assert_eq!(o_r, p.cos()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs +new file mode 100644 +index 000000000000..e3356d853a83 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs +@@ -0,0 +1,33 @@ ++//! Implements vertical (lane-wise) floating-point `exp`. ++ ++macro_rules! impl_math_float_exp { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the exponential function of `self`: `e^(self)`. ++ #[inline] ++ pub fn exp(self) -> Self { ++ use crate::codegen::math::float::exp::Exp; ++ Exp::exp(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_exp>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn exp() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(o, z.exp()); ++ ++ let e = $id::splat(crate::f64::consts::E as $elem_ty); ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ assert!((e - o.exp()).abs().le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs +new file mode 100644 +index 000000000000..5ceb9173ae05 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs +@@ -0,0 +1,33 @@ ++//! Implements vertical (lane-wise) floating-point `ln`. ++ ++macro_rules! impl_math_float_ln { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the natural logarithm of `self`. ++ #[inline] ++ pub fn ln(self) -> Self { ++ use crate::codegen::math::float::ln::Ln; ++ Ln::ln(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_ln>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ln() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(z, o.ln()); ++ ++ let e = $id::splat(crate::f64::consts::E as $elem_ty); ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ assert!((o - e.ln()).abs().le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs +new file mode 100644 +index 000000000000..4b170ee2b755 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `mul_add`. ++ ++macro_rules! impl_math_float_mul_add { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Fused multiply add: `self * y + z` ++ #[inline] ++ pub fn mul_add(self, y: Self, z: Self) -> Self { ++ use crate::codegen::math::float::mul_add::MulAdd; ++ MulAdd::mul_add(self, y, z) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_mul_add>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn mul_add() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let t3 = $id::splat(3 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ assert_eq!(z, z.mul_add(z, z)); ++ assert_eq!(o, o.mul_add(o, z)); ++ assert_eq!(o, o.mul_add(z, o)); ++ assert_eq!(o, z.mul_add(o, o)); ++ ++ assert_eq!(t, o.mul_add(o, o)); ++ assert_eq!(t, o.mul_add(t, z)); ++ assert_eq!(t, t.mul_add(o, z)); ++ ++ assert_eq!(f, t.mul_add(t, z)); ++ assert_eq!(f, t.mul_add(o, t)); ++ assert_eq!(t3, t.mul_add(o, o)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs +new file mode 100644 +index 000000000000..c5b27110f2d7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs +@@ -0,0 +1,48 @@ ++//! Implements vertical (lane-wise) floating-point `mul_adde`. ++ ++macro_rules! impl_math_float_mul_adde { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Fused multiply add estimate: ~= `self * y + z` ++ /// ++ /// While fused multiply-add (`fma`) has infinite precision, ++ /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. ++ /// This might be more efficient on architectures that do not have an `fma` instruction. ++ #[inline] ++ pub fn mul_adde(self, y: Self, z: Self) -> Self { ++ use crate::codegen::math::float::mul_adde::MulAddE; ++ MulAddE::mul_adde(self, y, z) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_mul_adde>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn mul_adde() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let t3 = $id::splat(3 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ assert_eq!(z, z.mul_adde(z, z)); ++ assert_eq!(o, o.mul_adde(o, z)); ++ assert_eq!(o, o.mul_adde(z, o)); ++ assert_eq!(o, z.mul_adde(o, o)); ++ ++ assert_eq!(t, o.mul_adde(o, o)); ++ assert_eq!(t, o.mul_adde(t, z)); ++ assert_eq!(t, t.mul_adde(o, z)); ++ ++ assert_eq!(f, t.mul_adde(t, z)); ++ assert_eq!(f, t.mul_adde(o, t)); ++ assert_eq!(t3, t.mul_adde(o, o)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs +new file mode 100644 +index 000000000000..83dc9ff9c05e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs +@@ -0,0 +1,36 @@ ++//! Implements vertical (lane-wise) floating-point `powf`. ++ ++macro_rules! impl_math_float_powf { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Raises `self` number to the floating point power of `x`. ++ #[inline] ++ pub fn powf(self, x: Self) -> Self { ++ use crate::codegen::math::float::powf::Powf; ++ Powf::powf(self, x) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_powf>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn powf() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ assert_eq!(o, o.powf(z)); ++ assert_eq!(o, t.powf(z)); ++ assert_eq!(o, o.powf(o)); ++ assert_eq!(t, t.powf(o)); ++ ++ let f = $id::splat(4 as $elem_ty); ++ assert_eq!(f, t.powf(t)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs +new file mode 100644 +index 000000000000..127f0b2ff674 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs +@@ -0,0 +1,36 @@ ++//! Implements vertical (lane-wise) floating-point `recpre`. ++ ++macro_rules! impl_math_float_recpre { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reciprocal estimate: `~= 1. / self`. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn recpre(self) -> Self { ++ $id::splat(1.) / self ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_recpre>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn recpre() { ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.recpre()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = 0.5; ++ let error = (e - t.recpre()).abs(); ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs +new file mode 100644 +index 000000000000..c77977f7b1cd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs +@@ -0,0 +1,40 @@ ++//! Implements vertical (lane-wise) floating-point `rsqrte`. ++ ++macro_rules! impl_math_float_rsqrte { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn rsqrte(self) -> Self { ++ unsafe { ++ use crate::llvm::simd_fsqrt; ++ $id::splat(1.) / Simd(simd_fsqrt(self.0)) ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_rsqrte>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn rsqrte() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.rsqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = 1. / SQRT_2; ++ let error = (e - t.rsqrte()).abs(); ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs +new file mode 100644 +index 000000000000..49908319b126 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs +@@ -0,0 +1,50 @@ ++//! Implements vertical (lane-wise) floating-point `sin`. ++ ++macro_rules! impl_math_float_sin { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Sine. ++ #[inline] ++ pub fn sin(self) -> Self { ++ use crate::codegen::math::float::sin::Sin; ++ Sin::sin(self) ++ } ++ ++ /// Sine of `self * PI`. ++ #[inline] ++ pub fn sin_pi(self) -> Self { ++ use crate::codegen::math::float::sin_pi::SinPi; ++ SinPi::sin_pi(self) ++ } ++ ++ /// Sine and cosine of `self * PI`. ++ #[inline] ++ pub fn sin_cos_pi(self) -> (Self, Self) { ++ use crate::codegen::math::float::sin_cos_pi::SinCosPi; ++ SinCosPi::sin_cos_pi(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sin>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sin() { ++ use crate::$elem_ty::consts::PI; ++ let z = $id::splat(0 as $elem_ty); ++ let p = $id::splat(PI as $elem_ty); ++ let ph = $id::splat(PI as $elem_ty / 2.); ++ let o_r = $id::splat((PI as $elem_ty / 2.).sin()); ++ let z_r = $id::splat((PI as $elem_ty).sin()); ++ ++ assert_eq!(z, z.sin()); ++ assert_eq!(o_r, ph.sin()); ++ assert_eq!(z_r, p.sin()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs +new file mode 100644 +index 000000000000..ae624122d0e2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs +@@ -0,0 +1,35 @@ ++//! Implements vertical (lane-wise) floating-point `sqrt`. ++ ++macro_rules! impl_math_float_sqrt { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ #[inline] ++ pub fn sqrt(self) -> Self { ++ use crate::codegen::math::float::sqrt::Sqrt; ++ Sqrt::sqrt(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sqrt>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sqrt() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(z, z.sqrt()); ++ assert_eq!(o, o.sqrt()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = $id::splat(SQRT_2); ++ assert_eq!(e, t.sqrt()); ++ ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs +new file mode 100644 +index 000000000000..f7ffad748d9c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `sqrte`. ++ ++macro_rules! impl_math_float_sqrte { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Square-root estimate. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn sqrte(self) -> Self { ++ use crate::codegen::math::float::sqrte::Sqrte; ++ Sqrte::sqrte(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sqrte>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sqrte() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ ++ let z = $id::splat(0 as $elem_ty); ++ let error = (z - z.sqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.sqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = $id::splat(SQRT_2 as $elem_ty); ++ let error = (e - t.sqrte()).abs(); ++ ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs +new file mode 100644 +index 000000000000..840d9e32585d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal.rs +@@ -0,0 +1,6 @@ ++#[macro_use] ++mod iuf; ++#[macro_use] ++mod mask; ++#[macro_use] ++mod ptr; +diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs +new file mode 100644 +index 000000000000..58ffabab994f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs +@@ -0,0 +1,167 @@ ++//! Minimal API of signed integer, unsigned integer, and floating-point ++//! vectors. ++ ++macro_rules! impl_minimal_iuf { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | ++ $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { ++ ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: $elem_ty),*) -> Self { ++ Simd(codegen::$id($($elem_name as $ielem_ty),*)) ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: $elem_ty) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ value as $ielem_ty ++ }),*)) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> $elem_ty { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { ++ use crate::llvm::simd_extract; ++ let e: $ielem_ty = simd_extract(self.0, index as u32); ++ e as $elem_ty ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: $elem_ty, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // lanes: ++ assert_eq!($elem_count, $id::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ for i in 0..$id::lanes() { ++ assert_eq!(VAL, VEC.extract(i)); ++ assert_eq!( ++ VAL, unsafe { VEC.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = VEC.replace(0, 42 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(42 as $elem_ty, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ VEC.replace_unchecked(0, 42 as $elem_ty) ++ }; ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(42 as $elem_ty, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL, new_vec.extract(i)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ let _ = VEC.extract($id::lanes()); ++ } ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ let _ = VEC.replace($id::lanes(), 42 as $elem_ty); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs +new file mode 100644 +index 000000000000..e65be95db12c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs +@@ -0,0 +1,174 @@ ++//! Minimal API of mask vectors. ++ ++macro_rules! impl_minimal_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident ++ | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: bool),*) -> Self { ++ Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) ++ } ++ ++ /// Converts a boolean type into the type of the vector lanes. ++ #[inline] ++ #[allow(clippy::indexing_slicing)] ++ const fn bool_to_internal(x: bool) -> $ielem_ty { ++ [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: bool) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ Self::bool_to_internal(value) ++ }),*)) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> bool { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> bool { ++ use crate::llvm::simd_extract; ++ let x: $ielem_ty = simd_extract(self.0, index as u32); ++ x != 0 ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub fn replace(self, index: usize, new_value: bool) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: bool, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, ++ Self::bool_to_internal(new_value))) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // TODO: test new ++ ++ // lanes: ++ assert_eq!($elem_count, $id::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ let vec = $id::splat(true); ++ for i in 0..$id::lanes() { ++ assert_eq!(true, vec.extract(i)); ++ assert_eq!(true, ++ unsafe { vec.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = vec.replace(0, false); ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(false, new_vec.extract(i)); ++ } else { ++ assert_eq!(true, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ vec.replace_unchecked(0, false) ++ }; ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(false, new_vec.extract(i)); ++ } else { ++ assert_eq!(true, new_vec.extract(i)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ let vec = $id::splat(false); ++ let _ = vec.extract($id::lanes()); ++ } ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ let vec = $id::splat(false); ++ let _ = vec.replace($id::lanes(), true); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs +new file mode 100644 +index 000000000000..75e5aad5c065 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs +@@ -0,0 +1,1385 @@ ++//! Minimal API of pointer vectors. ++ ++macro_rules! impl_minimal_p { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt ++ | $($elem_name:ident),+ | ($true:expr, $false:expr) | ++ $(#[$doc:meta])*) => { ++ ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: $elem_ty),*) -> Self { ++ Simd(codegen::$id($($elem_name),*)) ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: $elem_ty) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ value ++ }),*)) ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `null`. ++ #[inline] ++ pub const fn null() -> Self { ++ Self::splat(crate::ptr::null_mut() as $elem_ty) ++ } ++ ++ /// Returns a mask that selects those lanes that contain `null` ++ /// pointers. ++ #[inline] ++ pub fn is_null(self) -> $mask_ty { ++ self.eq(Self::null()) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> $elem_ty { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { ++ use crate::llvm::simd_extract; ++ simd_extract(self.0, index as u32) ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ #[allow(clippy::not_unsafe_ptr_arg_deref)] ++ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: $elem_ty, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, new_value)) ++ } ++ } ++ ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // lanes: ++ assert_eq!($elem_count, $id::::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ let VAL7: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VAL42: <$id as sealed::Simd>::Element ++ = $ref!(42); ++ let VEC: $id = $id::splat(VAL7); ++ for i in 0..$id::::lanes() { ++ assert_eq!(VAL7, VEC.extract(i)); ++ assert_eq!( ++ VAL7, unsafe { VEC.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = VEC.replace(0, VAL42); ++ for i in 0..$id::::lanes() { ++ if i == 0 { ++ assert_eq!(VAL42, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL7, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ VEC.replace_unchecked(0, VAL42) ++ }; ++ for i in 0..$id::::lanes() { ++ if i == 0 { ++ assert_eq!(VAL42, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL7, new_vec.extract(i)); ++ } ++ } ++ ++ let mut n = $id::::null(); ++ assert_eq!( ++ n, ++ $id::::splat(unsafe { crate::mem::zeroed() }) ++ ); ++ assert!(n.is_null().all()); ++ n = n.replace( ++ 0, unsafe { crate::mem::transmute(1_isize) } ++ ); ++ assert!(!n.is_null().all()); ++ if $id::::lanes() > 1 { ++ assert!(n.is_null().any()); ++ } else { ++ assert!(!n.is_null().any()); ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ let VAL: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VEC: $id = $id::splat(VAL); ++ let _ = VEC.extract($id::::lanes()); ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ let VAL: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VAL42: <$id as sealed::Simd>::Element ++ = $ref!(42); ++ let VEC: $id = $id::splat(VAL); ++ let _ = VEC.replace($id::::lanes(), VAL42); ++ } ++ } ++ } ++ } ++ ++ impl crate::fmt::Debug for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) ++ -> crate::fmt::Result { ++ write!( ++ f, ++ "{}<{}>(", ++ stringify!($id), ++ unsafe { crate::intrinsics::type_name::() } ++ )?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_debug>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn debug() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::::default(); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:?}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!( ++ s.starts_with(beg.as_str()), ++ "s = {} (should start with = {})", s, beg ++ ); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:?}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ // FIXME: ptrs do not implement default ++ Self::null() ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _default>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn default() { ++ let a = $id::::default(); ++ for i in 0..$id::::lanes() { ++ assert_eq!( ++ a.extract(i), unsafe { crate::mem::zeroed() } ++ ); ++ } ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Lane-wise equality comparison. ++ #[inline] ++ pub fn eq(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_eq; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_eq(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise inequality comparison. ++ #[inline] ++ pub fn ne(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_ne; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_ne(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise less-than comparison. ++ #[inline] ++ pub fn lt(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_lt; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_lt(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise less-than-or-equals comparison. ++ #[inline] ++ pub fn le(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_le; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_le(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise greater-than comparison. ++ #[inline] ++ pub fn gt(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_gt; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_gt(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise greater-than-or-equals comparison. ++ #[inline] ++ pub fn ge(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_ge; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_ge(a.0, b.0)) ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_vertical>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cmp() { ++ let a = $id::::null(); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ let r = a.lt(b); ++ let e = $mask_ty::splat(true); ++ assert!(r == e); ++ let r = a.le(b); ++ assert!(r == e); ++ ++ let e = $mask_ty::splat(false); ++ let r = a.gt(b); ++ assert!(r == e); ++ let r = a.ge(b); ++ assert!(r == e); ++ let r = a.eq(b); ++ assert!(r == e); ++ ++ let mut a = a; ++ let mut b = b; ++ let mut e = e; ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ a = a.replace( ++ i, ++ unsafe { crate::mem::transmute(0_isize) } ++ ); ++ b = b.replace( ++ i, ++ unsafe { crate::mem::transmute(1_isize) } ++ ); ++ e = e.replace(i, true); ++ } else { ++ a = a.replace( ++ i, ++ unsafe { crate::mem::transmute(1_isize) } ++ ); ++ b = b.replace( ++ i, ++ unsafe { crate::mem::transmute(0_isize) } ++ ); ++ e = e.replace(i, false); ++ } ++ } ++ let r = a.lt(b); ++ assert!(r == e); ++ } ++ } ++ } ++ } ++ ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ $id::::eq(*self, *other).all() ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ $id::::ne(*self, *other).any() ++ } ++ } ++ ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq>> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialEq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_eq() { ++ let a = $id::::null(); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ ++ if $id::::lanes() > 1 { ++ let a = $id::::null().replace(0, unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ } ++ } ++ } ++ } ++ } ++ ++ impl crate::cmp::Eq for $id {} ++ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_eq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::::null(); ++ foo(a); ++ } ++ } ++ } ++ } ++ ++ impl From<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn from(array: [$elem_ty; $elem_count]) -> Self { ++ unsafe { ++ // FIXME: unnecessary zeroing; better than UB. ++ let mut u: Self = crate::mem::zeroed(); ++ crate::ptr::copy_nonoverlapping( ++ &array as *const [$elem_ty; $elem_count] as *const u8, ++ &mut u as *mut Self as *mut u8, ++ crate::mem::size_of::() ++ ); ++ u ++ } ++ } ++ } ++ impl Into<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn into(self) -> [$elem_ty; $elem_count] { ++ unsafe { ++ // FIXME: unnecessary zeroing; better than UB. ++ let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); ++ crate::ptr::copy_nonoverlapping( ++ &self as *const $id as *const u8, ++ &mut u as *mut [$elem_ty; $elem_count] as *mut u8, ++ crate::mem::size_of::() ++ ); ++ u ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn array() { ++ let values = [1_i32; $elem_count]; ++ ++ let mut vec: $id = Default::default(); ++ let mut array = [ ++ $id::::null().extract(0); $elem_count ++ ]; ++ ++ for i in 0..$elem_count { ++ let ptr = unsafe { ++ crate::mem::transmute( ++ &values[i] as *const i32 ++ ) ++ }; ++ vec = vec.replace(i, ptr); ++ array[i] = ptr; ++ } ++ ++ // FIXME: there is no impl of From<$id> for [$elem_ty; N] ++ // let a0 = From::from(vec); ++ // assert_eq!(a0, array); ++ #[allow(unused_assignments)] ++ let mut a1 = array; ++ a1 = vec.into(); ++ assert_eq!(a1, array); ++ ++ let v0: $id = From::from(array); ++ assert_eq!(v0, vec); ++ let v1: $id = array.into(); ++ assert_eq!(v1, vec); ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary. ++ #[inline] ++ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ assert!( ++ target_ptr.align_offset(crate::mem::align_of::()) ++ == 0 ++ ); ++ Self::from_slice_aligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ Self::from_slice_unaligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary, the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) ++ -> Self { ++ #[allow(clippy::cast_ptr_alignment)] ++ *(slice.get_unchecked(0) as *const $elem_ty as *const Self) ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_unaligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ use crate::mem::size_of; ++ let target_ptr = ++ slice.get_unchecked(0) as *const $elem_ty as *const u8; ++ let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); ++ let self_ptr = &mut x as *mut Self as *mut u8; ++ crate::ptr::copy_nonoverlapping( ++ target_ptr, ++ self_ptr, ++ size_of::(), ++ ); ++ x ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_from_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_unaligned() { ++ let (null, non_null) = ptr_vals!($id); ++ ++ let mut unaligned = [ ++ non_null; $id::::lanes() + 1 ++ ]; ++ unaligned[0] = null; ++ let vec = $id::::from_slice_unaligned( ++ &unaligned[1..] ++ ); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_unaligned_fail() { ++ let (_null, non_null) = ptr_vals!($id); ++ let unaligned = [non_null; $id::::lanes() + 1]; ++ // the slice is not large enough => panic ++ let _vec = $id::::from_slice_unaligned( ++ &unaligned[2..] ++ ); ++ } ++ ++ union A { ++ data: [<$id as sealed::Simd>::Element; ++ 2 * $id::::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_aligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ for i in ++ $id::::lanes()..(2 * $id::::lanes()) { ++ unsafe { ++ aligned.data[i] = non_null; ++ } ++ } ++ ++ let vec = unsafe { ++ $id::::from_slice_aligned( ++ &aligned.data[$id::::lanes()..] ++ ) ++ }; ++ for (index, &b) in unsafe { ++ aligned.data.iter().enumerate() ++ } { ++ if index < $id::::lanes() { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!( ++ b, vec.extract(index - $id::::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_lanes() { ++ let (_null, non_null) = ptr_vals!($id); ++ let aligned = A { ++ data: [non_null; 2 * $id::::lanes()], ++ }; ++ // the slice is not large enough => panic ++ let _vec = unsafe { ++ $id::::from_slice_aligned( ++ &aligned.data[2 * $id::::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_align() { ++ unsafe { ++ let (null, _non_null) = ptr_vals!($id); ++ let aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr = aligned.data.as_ptr(); ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // from_slice_aligned won't fail here (e.g. this ++ // can happen for i128x1). So we panic to make ++ // the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s = slice::from_raw_parts( ++ ptr, $id::::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let _vec = $id::::from_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary. ++ #[inline] ++ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty; ++ assert!( ++ target_ptr.align_offset(crate::mem::align_of::()) ++ == 0 ++ ); ++ self.write_to_slice_aligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ self.write_to_slice_unaligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary, the behavior is ++ /// undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_aligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ #[allow(clippy::cast_ptr_alignment)] ++ *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = ++ self; ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_unaligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; ++ let self_ptr = &self as *const Self as *const u8; ++ crate::ptr::copy_nonoverlapping( ++ self_ptr, ++ target_ptr, ++ crate::mem::size_of::(), ++ ); ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_write_to_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_unaligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut unaligned = [null; $id::::lanes() + 1]; ++ let vec = $id::::splat(non_null); ++ vec.write_to_slice_unaligned(&mut unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_unaligned_fail() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut unaligned = [null; $id::::lanes() + 1]; ++ let vec = $id::::splat(non_null); ++ // the slice is not large enough => panic ++ vec.write_to_slice_unaligned(&mut unaligned[2..]); ++ } ++ ++ union A { ++ data: [<$id as sealed::Simd>::Element; ++ 2 * $id::::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_aligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ let vec = $id::::splat(non_null); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[$id::::lanes()..] ++ ) ++ }; ++ for (index, &b) in ++ unsafe { aligned.data.iter().enumerate() } { ++ if index < $id::::lanes() { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!( ++ b, vec.extract(index - $id::::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_lanes() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ let vec = $id::::splat(non_null); ++ // the slice is not large enough => panic ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[2 * $id::::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_align() { ++ let (null, non_null) = ptr_vals!($id); ++ unsafe { ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr = aligned.data.as_mut_ptr(); ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // write_to_slice_aligned won't fail here (e.g. ++ // this can happen for i128x1). So we panic to ++ // make the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s = slice::from_raw_parts_mut( ++ ptr, $id::::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let vec = $id::::splat(non_null); ++ vec.write_to_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; ++ s.hash(state) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _hash>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn hash() { ++ use crate::hash::{Hash, Hasher}; ++ #[allow(deprecated)] ++ use crate::hash::{SipHasher13}; ++ ++ let values = [1_i32; $elem_count]; ++ ++ let mut vec: $id = Default::default(); ++ let mut array = [ ++ $id::::null().extract(0); ++ $elem_count ++ ]; ++ ++ for i in 0..$elem_count { ++ let ptr = unsafe { ++ crate::mem::transmute( ++ &values[i] as *const i32 ++ ) ++ }; ++ vec = vec.replace(i, ptr); ++ array[i] = ptr; ++ } ++ ++ #[allow(deprecated)] ++ let mut a_hash = SipHasher13::new(); ++ let mut v_hash = a_hash.clone(); ++ array.hash(&mut a_hash); ++ vec.hash(&mut v_hash); ++ assert_eq!(a_hash.finish(), v_hash.finish()); ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Calculates the offset from a pointer. ++ /// ++ /// `count` is in units of `T`; e.g. a count of `3` represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset, in bytes, cannot overflow an `isize`. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum, in bytes ++ /// must fit in a `usize`. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` ++ /// is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ pub unsafe fn offset(self, count: $isize_ty) -> Self { ++ // FIXME: should use LLVM's `add nsw nuw` ++ self.wrapping_offset(count) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// ++ /// `count` is in units of `T`; e.g. a count of `3` represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires unsafe). ++ /// ++ /// Always use `.offset(count)` instead when possible, because ++ /// offset allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_offset(self, count: $isize_ty) -> Self { ++ unsafe { ++ let x: $isize_ty = crate::mem::transmute(self); ++ // note: {+,*} currently performs a `wrapping_{add, mul}` ++ crate::mem::transmute( ++ x + (count * crate::mem::size_of::() as isize) ++ ) ++ } ++ } ++ ++ /// Calculates the distance between two pointers. ++ /// ++ /// The returned value is in units of `T`: the distance in bytes is ++ /// divided by `mem::size_of::()`. ++ /// ++ /// This function is the inverse of offset. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and other pointer must be either in bounds ++ /// or one byte past the end of the same allocated object. ++ /// ++ /// * The distance between the pointers, in bytes, cannot overflow ++ /// an `isize`. ++ /// ++ /// * The distance between the pointers, in bytes, must be an exact ++ /// multiple of the size of `T`. ++ /// ++ /// * The distance being in bounds cannot rely on "wrapping around" ++ /// the address space. ++ /// ++ /// The compiler and standard library generally try to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` ++ /// is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using wrapping_offset_from instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { ++ // FIXME: should use LLVM's `sub nsw nuw`. ++ self.wrapping_offset_from(origin) ++ } ++ ++ /// Calculates the distance between two pointers. ++ /// ++ /// The returned value is in units of `T`: the distance in bytes is ++ /// divided by `mem::size_of::()`. ++ /// ++ /// If the address different between the two pointers is not a ++ /// multiple of `mem::size_of::()` then the result of the ++ /// division is rounded towards zero. ++ /// ++ /// Though this method is safe for any two pointers, note that its ++ /// result will be mostly useless if the two pointers aren't into ++ /// the same allocated object, for example if they point to two ++ /// different local variables. ++ #[inline] ++ pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { ++ let x: $isize_ty = unsafe { crate::mem::transmute(self) }; ++ let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; ++ // note: {-,/} currently perform wrapping_{sub, div} ++ (y - x) / (crate::mem::size_of::() as isize) ++ } ++ ++ /// Calculates the offset from a pointer (convenience for ++ /// `.offset(count as isize)`). ++ /// ++ /// `count` is in units of `T`; e.g. a count of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset, in bytes, cannot overflow an `isize`. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum must fit ++ /// in a `usize`. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always ++ /// safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ #[allow(clippy::should_implement_trait)] ++ pub unsafe fn add(self, count: $usize_ty) -> Self { ++ self.offset(count.cast()) ++ } ++ ++ /// Calculates the offset from a pointer (convenience for ++ /// `.offset((count as isize).wrapping_neg())`). ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset cannot exceed `isize::MAX` **bytes**. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum must fit ++ /// in a usize. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so ++ /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table ++ /// limitations or splitting the address space. However, some 32-bit ++ /// and 16-bit platforms may successfully serve a request for more ++ /// than `isize::MAX` bytes with things like Physical Address ++ /// Extension. As such, memory acquired directly from allocators or ++ /// memory mapped files *may* be too large to handle with this ++ /// function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ #[allow(clippy::should_implement_trait)] ++ pub unsafe fn sub(self, count: $usize_ty) -> Self { ++ let x: $isize_ty = count.cast(); ++ // note: - is currently wrapping_neg ++ self.offset(-x) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// (convenience for `.wrapping_offset(count as isize)`) ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires `unsafe`). ++ /// ++ /// Always use `.add(count)` instead when possible, because `add` ++ /// allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_add(self, count: $usize_ty) -> Self { ++ self.wrapping_offset(count.cast()) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// (convenience for `.wrapping_offset((count as ++ /// isize).wrapping_sub())`) ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires `unsafe`). ++ /// ++ /// Always use `.sub(count)` instead when possible, because `sub` ++ /// allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_sub(self, count: $usize_ty) -> Self { ++ let x: $isize_ty = count.cast(); ++ self.wrapping_offset(-1 * x) ++ } ++ } ++ ++ impl $id { ++ /// Shuffle vector elements according to `indices`. ++ #[inline] ++ pub fn shuffle1_dyn(self, indices: I) -> Self ++ where ++ Self: codegen::shuffle1_dyn::Shuffle1Dyn, ++ { ++ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ let (null, non_null) = ptr_vals!($id); ++ ++ // alternating = [non_null, null, non_null, null, ...] ++ let mut alternating = $id::::splat(null); ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ alternating = alternating.replace(i, non_null); ++ } ++ } ++ ++ type Indices = <$id ++ as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; ++ // even = [0, 0, 2, 2, 4, 4, ..] ++ let even = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 - 1).into()); ++ } ++ } ++ v ++ }; ++ // odd = [1, 1, 3, 3, 5, 5, ...] ++ let odd = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::::lanes() { ++ if i % 2 != 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 + 1).into()); ++ } ++ } ++ v ++ }; ++ ++ assert_eq!( ++ alternating.shuffle1_dyn(even), ++ $id::::splat(non_null) ++ ); ++ if $id::::lanes() > 1 { ++ assert_eq!( ++ alternating.shuffle1_dyn(odd), ++ $id::::splat(null) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs +new file mode 100644 +index 000000000000..f71c98795da3 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops.rs +@@ -0,0 +1,32 @@ ++//! Implementation of the `ops` traits ++#[macro_use] ++mod vector_mask_bitwise; ++#[macro_use] ++mod scalar_mask_bitwise; ++ ++#[macro_use] ++mod vector_arithmetic; ++#[macro_use] ++mod scalar_arithmetic; ++ ++#[macro_use] ++mod vector_bitwise; ++#[macro_use] ++mod scalar_bitwise; ++ ++#[macro_use] ++mod vector_shifts; ++#[macro_use] ++mod scalar_shifts; ++ ++#[macro_use] ++mod vector_rotates; ++ ++#[macro_use] ++mod vector_neg; ++ ++#[macro_use] ++mod vector_int_min_max; ++ ++#[macro_use] ++mod vector_float_min_max; +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs +new file mode 100644 +index 000000000000..da1a2037eaaf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs +@@ -0,0 +1,203 @@ ++//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. ++ ++macro_rules! impl_ops_scalar_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Add<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn add(self, other: $elem_ty) -> Self { ++ self + $id::splat(other) ++ } ++ } ++ impl crate::ops::Add<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn add(self, other: $id) -> $id { ++ $id::splat(self) + other ++ } ++ } ++ ++ impl crate::ops::Sub<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn sub(self, other: $elem_ty) -> Self { ++ self - $id::splat(other) ++ } ++ } ++ impl crate::ops::Sub<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn sub(self, other: $id) -> $id { ++ $id::splat(self) - other ++ } ++ } ++ ++ impl crate::ops::Mul<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn mul(self, other: $elem_ty) -> Self { ++ self * $id::splat(other) ++ } ++ } ++ impl crate::ops::Mul<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn mul(self, other: $id) -> $id { ++ $id::splat(self) * other ++ } ++ } ++ ++ impl crate::ops::Div<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn div(self, other: $elem_ty) -> Self { ++ self / $id::splat(other) ++ } ++ } ++ impl crate::ops::Div<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn div(self, other: $id) -> $id { ++ $id::splat(self) / other ++ } ++ } ++ ++ impl crate::ops::Rem<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn rem(self, other: $elem_ty) -> Self { ++ self % $id::splat(other) ++ } ++ } ++ impl crate::ops::Rem<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn rem(self, other: $id) -> $id { ++ $id::splat(self) % other ++ } ++ } ++ ++ impl crate::ops::AddAssign<$elem_ty> for $id { ++ #[inline] ++ fn add_assign(&mut self, other: $elem_ty) { ++ *self = *self + other; ++ } ++ } ++ ++ impl crate::ops::SubAssign<$elem_ty> for $id { ++ #[inline] ++ fn sub_assign(&mut self, other: $elem_ty) { ++ *self = *self - other; ++ } ++ } ++ ++ impl crate::ops::MulAssign<$elem_ty> for $id { ++ #[inline] ++ fn mul_assign(&mut self, other: $elem_ty) { ++ *self = *self * other; ++ } ++ } ++ ++ impl crate::ops::DivAssign<$elem_ty> for $id { ++ #[inline] ++ fn div_assign(&mut self, other: $elem_ty) { ++ *self = *self / other; ++ } ++ } ++ ++ impl crate::ops::RemAssign<$elem_ty> for $id { ++ #[inline] ++ fn rem_assign(&mut self, other: $elem_ty) { ++ *self = *self % other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_arith>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_arithmetic() { ++ let zi = 0 as $elem_ty; ++ let oi = 1 as $elem_ty; ++ let ti = 2 as $elem_ty; ++ let fi = 4 as $elem_ty; ++ let z = $id::splat(zi); ++ let o = $id::splat(oi); ++ let t = $id::splat(ti); ++ let f = $id::splat(fi); ++ ++ // add ++ assert_eq!(zi + z, z); ++ assert_eq!(z + zi, z); ++ assert_eq!(oi + z, o); ++ assert_eq!(o + zi, o); ++ assert_eq!(ti + z, t); ++ assert_eq!(t + zi, t); ++ assert_eq!(ti + t, f); ++ assert_eq!(t + ti, f); ++ // sub ++ assert_eq!(zi - z, z); ++ assert_eq!(z - zi, z); ++ assert_eq!(oi - z, o); ++ assert_eq!(o - zi, o); ++ assert_eq!(ti - z, t); ++ assert_eq!(t - zi, t); ++ assert_eq!(fi - t, t); ++ assert_eq!(f - ti, t); ++ assert_eq!(f - o - o, t); ++ assert_eq!(f - oi - oi, t); ++ // mul ++ assert_eq!(zi * z, z); ++ assert_eq!(z * zi, z); ++ assert_eq!(zi * o, z); ++ assert_eq!(z * oi, z); ++ assert_eq!(zi * t, z); ++ assert_eq!(z * ti, z); ++ assert_eq!(oi * t, t); ++ assert_eq!(o * ti, t); ++ assert_eq!(ti * t, f); ++ assert_eq!(t * ti, f); ++ // div ++ assert_eq!(zi / o, z); ++ assert_eq!(z / oi, z); ++ assert_eq!(ti / o, t); ++ assert_eq!(t / oi, t); ++ assert_eq!(fi / o, f); ++ assert_eq!(f / oi, f); ++ assert_eq!(ti / t, o); ++ assert_eq!(t / ti, o); ++ assert_eq!(fi / t, t); ++ assert_eq!(f / ti, t); ++ // rem ++ assert_eq!(oi % o, z); ++ assert_eq!(o % oi, z); ++ assert_eq!(fi % t, z); ++ assert_eq!(f % ti, z); ++ ++ { ++ let mut v = z; ++ assert_eq!(v, z); ++ v += oi; // add_assign ++ assert_eq!(v, o); ++ v -= oi; // sub_assign ++ assert_eq!(v, z); ++ v = t; ++ v *= oi; // mul_assign ++ assert_eq!(v, t); ++ v *= ti; ++ assert_eq!(v, f); ++ v /= oi; // div_assign ++ assert_eq!(v, f); ++ v /= ti; ++ assert_eq!(v, t); ++ v %= ti; // rem_assign ++ assert_eq!(v, z); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs +new file mode 100644 +index 000000000000..88216769aec4 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs +@@ -0,0 +1,162 @@ ++//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. ++ ++macro_rules! impl_ops_scalar_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::BitXor<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: $elem_ty) -> Self { ++ self ^ $id::splat(other) ++ } ++ } ++ impl crate::ops::BitXor<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitxor(self, other: $id) -> $id { ++ $id::splat(self) ^ other ++ } ++ } ++ ++ impl crate::ops::BitAnd<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: $elem_ty) -> Self { ++ self & $id::splat(other) ++ } ++ } ++ impl crate::ops::BitAnd<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitand(self, other: $id) -> $id { ++ $id::splat(self) & other ++ } ++ } ++ ++ impl crate::ops::BitOr<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: $elem_ty) -> Self { ++ self | $id::splat(other) ++ } ++ } ++ impl crate::ops::BitOr<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitor(self, other: $id) -> $id { ++ $id::splat(self) | other ++ } ++ } ++ ++ impl crate::ops::BitAndAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: $elem_ty) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: $elem_ty) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: $elem_ty) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_bitwise>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_bitwise() { ++ let zi = 0 as $elem_ty; ++ let oi = 1 as $elem_ty; ++ let ti = 2 as $elem_ty; ++ let z = $id::splat(zi); ++ let o = $id::splat(oi); ++ let t = $id::splat(ti); ++ ++ // BitAnd: ++ assert_eq!(oi & o, o); ++ assert_eq!(o & oi, o); ++ assert_eq!(oi & z, z); ++ assert_eq!(o & zi, z); ++ assert_eq!(zi & o, z); ++ assert_eq!(z & oi, z); ++ assert_eq!(zi & z, z); ++ assert_eq!(z & zi, z); ++ ++ assert_eq!(ti & t, t); ++ assert_eq!(t & ti, t); ++ assert_eq!(ti & o, z); ++ assert_eq!(t & oi, z); ++ assert_eq!(oi & t, z); ++ assert_eq!(o & ti, z); ++ ++ // BitOr: ++ assert_eq!(oi | o, o); ++ assert_eq!(o | oi, o); ++ assert_eq!(oi | z, o); ++ assert_eq!(o | zi, o); ++ assert_eq!(zi | o, o); ++ assert_eq!(z | oi, o); ++ assert_eq!(zi | z, z); ++ assert_eq!(z | zi, z); ++ ++ assert_eq!(ti | t, t); ++ assert_eq!(t | ti, t); ++ assert_eq!(zi | t, t); ++ assert_eq!(z | ti, t); ++ assert_eq!(ti | z, t); ++ assert_eq!(t | zi, t); ++ ++ // BitXOR: ++ assert_eq!(oi ^ o, z); ++ assert_eq!(o ^ oi, z); ++ assert_eq!(zi ^ z, z); ++ assert_eq!(z ^ zi, z); ++ assert_eq!(zi ^ o, o); ++ assert_eq!(z ^ oi, o); ++ assert_eq!(oi ^ z, o); ++ assert_eq!(o ^ zi, o); ++ ++ assert_eq!(ti ^ t, z); ++ assert_eq!(t ^ ti, z); ++ assert_eq!(ti ^ z, t); ++ assert_eq!(t ^ zi, t); ++ assert_eq!(zi ^ t, t); ++ assert_eq!(z ^ ti, t); ++ ++ { ++ // AndAssign: ++ let mut v = o; ++ v &= ti; ++ assert_eq!(v, z); ++ } ++ { ++ // OrAssign: ++ let mut v = z; ++ v |= oi; ++ assert_eq!(v, o); ++ } ++ { ++ // XORAssign: ++ let mut v = z; ++ v ^= oi; ++ assert_eq!(v, o); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs +new file mode 100644 +index 000000000000..523a85207b6b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs +@@ -0,0 +1,140 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_scalar_mask_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: bool) -> Self { ++ self ^ $id::splat(other) ++ } ++ } ++ impl crate::ops::BitXor<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitxor(self, other: $id) -> $id { ++ $id::splat(self) ^ other ++ } ++ } ++ ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: bool) -> Self { ++ self & $id::splat(other) ++ } ++ } ++ impl crate::ops::BitAnd<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitand(self, other: $id) -> $id { ++ $id::splat(self) & other ++ } ++ } ++ ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: bool) -> Self { ++ self | $id::splat(other) ++ } ++ } ++ impl crate::ops::BitOr<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitor(self, other: $id) -> $id { ++ $id::splat(self) | other ++ } ++ } ++ ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: bool) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: bool) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: bool) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_mask_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_mask_bitwise() { ++ let ti = true; ++ let fi = false; ++ let t = $id::splat(ti); ++ let f = $id::splat(fi); ++ assert!(t != f); ++ assert!(!(t == f)); ++ ++ // BitAnd: ++ assert_eq!(ti & f, f); ++ assert_eq!(t & fi, f); ++ assert_eq!(fi & t, f); ++ assert_eq!(f & ti, f); ++ assert_eq!(ti & t, t); ++ assert_eq!(t & ti, t); ++ assert_eq!(fi & f, f); ++ assert_eq!(f & fi, f); ++ ++ // BitOr: ++ assert_eq!(ti | f, t); ++ assert_eq!(t | fi, t); ++ assert_eq!(fi | t, t); ++ assert_eq!(f | ti, t); ++ assert_eq!(ti | t, t); ++ assert_eq!(t | ti, t); ++ assert_eq!(fi | f, f); ++ assert_eq!(f | fi, f); ++ ++ // BitXOR: ++ assert_eq!(ti ^ f, t); ++ assert_eq!(t ^ fi, t); ++ assert_eq!(fi ^ t, t); ++ assert_eq!(f ^ ti, t); ++ assert_eq!(ti ^ t, f); ++ assert_eq!(t ^ ti, f); ++ assert_eq!(fi ^ f, f); ++ assert_eq!(f ^ fi, f); ++ ++ { ++ // AndAssign: ++ let mut v = f; ++ v &= ti; ++ assert_eq!(v, f); ++ } ++ { ++ // OrAssign: ++ let mut v = f; ++ v |= ti; ++ assert_eq!(v, t); ++ } ++ { ++ // XORAssign: ++ let mut v = f; ++ v ^= ti; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs +new file mode 100644 +index 000000000000..9c164ad56c0b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs +@@ -0,0 +1,107 @@ ++//! Vertical (lane-wise) vector-scalar shifts operations. ++ ++macro_rules! impl_ops_scalar_shifts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Shl for $id { ++ type Output = Self; ++ #[inline] ++ fn shl(self, other: u32) -> Self { ++ self << $id::splat(other as $elem_ty) ++ } ++ } ++ impl crate::ops::Shr for $id { ++ type Output = Self; ++ #[inline] ++ fn shr(self, other: u32) -> Self { ++ self >> $id::splat(other as $elem_ty) ++ } ++ } ++ ++ impl crate::ops::ShlAssign for $id { ++ #[inline] ++ fn shl_assign(&mut self, other: u32) { ++ *self = *self << other; ++ } ++ } ++ impl crate::ops::ShrAssign for $id { ++ #[inline] ++ fn shr_assign(&mut self, other: u32) { ++ *self = *self >> other; ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_shifts>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), ++ allow(unreachable_code, ++ unused_variables, ++ unused_mut) ++ )] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn ops_scalar_shifts() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ { ++ let zi = 0 as u32; ++ let oi = 1 as u32; ++ let ti = 2 as u32; ++ let maxi ++ = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; ++ ++ // shr ++ assert_eq!(z >> zi, z); ++ assert_eq!(z >> oi, z); ++ assert_eq!(z >> ti, z); ++ assert_eq!(z >> ti, z); ++ ++ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 ++ return; ++ } ++ ++ assert_eq!(o >> zi, o); ++ assert_eq!(t >> zi, t); ++ assert_eq!(f >> zi, f); ++ assert_eq!(f >> maxi, z); ++ ++ assert_eq!(o >> oi, z); ++ assert_eq!(t >> oi, o); ++ assert_eq!(t >> ti, z); ++ assert_eq!(f >> oi, t); ++ assert_eq!(f >> ti, o); ++ assert_eq!(f >> maxi, z); ++ ++ // shl ++ assert_eq!(z << zi, z); ++ assert_eq!(o << zi, o); ++ assert_eq!(t << zi, t); ++ assert_eq!(f << zi, f); ++ assert_eq!(f << maxi, z); ++ ++ assert_eq!(o << oi, t); ++ assert_eq!(o << ti, f); ++ assert_eq!(t << oi, f); ++ ++ { // shr_assign ++ let mut v = o; ++ v >>= oi; ++ assert_eq!(v, z); ++ } ++ { // shl_assign ++ let mut v = o; ++ v <<= oi; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs +new file mode 100644 +index 000000000000..7057f52d0317 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs +@@ -0,0 +1,148 @@ ++//! Vertical (lane-wise) vector-vector arithmetic operations. ++ ++macro_rules! impl_ops_vector_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Add for $id { ++ type Output = Self; ++ #[inline] ++ fn add(self, other: Self) -> Self { ++ use crate::llvm::simd_add; ++ unsafe { Simd(simd_add(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Sub for $id { ++ type Output = Self; ++ #[inline] ++ fn sub(self, other: Self) -> Self { ++ use crate::llvm::simd_sub; ++ unsafe { Simd(simd_sub(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Mul for $id { ++ type Output = Self; ++ #[inline] ++ fn mul(self, other: Self) -> Self { ++ use crate::llvm::simd_mul; ++ unsafe { Simd(simd_mul(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Div for $id { ++ type Output = Self; ++ #[inline] ++ fn div(self, other: Self) -> Self { ++ use crate::llvm::simd_div; ++ unsafe { Simd(simd_div(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Rem for $id { ++ type Output = Self; ++ #[inline] ++ fn rem(self, other: Self) -> Self { ++ use crate::llvm::simd_rem; ++ unsafe { Simd(simd_rem(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::AddAssign for $id { ++ #[inline] ++ fn add_assign(&mut self, other: Self) { ++ *self = *self + other; ++ } ++ } ++ ++ impl crate::ops::SubAssign for $id { ++ #[inline] ++ fn sub_assign(&mut self, other: Self) { ++ *self = *self - other; ++ } ++ } ++ ++ impl crate::ops::MulAssign for $id { ++ #[inline] ++ fn mul_assign(&mut self, other: Self) { ++ *self = *self * other; ++ } ++ } ++ ++ impl crate::ops::DivAssign for $id { ++ #[inline] ++ fn div_assign(&mut self, other: Self) { ++ *self = *self / other; ++ } ++ } ++ ++ impl crate::ops::RemAssign for $id { ++ #[inline] ++ fn rem_assign(&mut self, other: Self) { ++ *self = *self % other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_arith>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_arithmetic() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ // add ++ assert_eq!(z + z, z); ++ assert_eq!(o + z, o); ++ assert_eq!(t + z, t); ++ assert_eq!(t + t, f); ++ // sub ++ assert_eq!(z - z, z); ++ assert_eq!(o - z, o); ++ assert_eq!(t - z, t); ++ assert_eq!(f - t, t); ++ assert_eq!(f - o - o, t); ++ // mul ++ assert_eq!(z * z, z); ++ assert_eq!(z * o, z); ++ assert_eq!(z * t, z); ++ assert_eq!(o * t, t); ++ assert_eq!(t * t, f); ++ // div ++ assert_eq!(z / o, z); ++ assert_eq!(t / o, t); ++ assert_eq!(f / o, f); ++ assert_eq!(t / t, o); ++ assert_eq!(f / t, t); ++ // rem ++ assert_eq!(o % o, z); ++ assert_eq!(f % t, z); ++ ++ { ++ let mut v = z; ++ assert_eq!(v, z); ++ v += o; // add_assign ++ assert_eq!(v, o); ++ v -= o; // sub_assign ++ assert_eq!(v, z); ++ v = t; ++ v *= o; // mul_assign ++ assert_eq!(v, t); ++ v *= t; ++ assert_eq!(v, f); ++ v /= o; // div_assign ++ assert_eq!(v, f); ++ v /= t; ++ assert_eq!(v, t); ++ v %= t; // rem_assign ++ assert_eq!(v, z); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs +new file mode 100644 +index 000000000000..7be9603fa261 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs +@@ -0,0 +1,129 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_vector_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::Not for $id { ++ type Output = Self; ++ #[inline] ++ fn not(self) -> Self { ++ Self::splat($true) ^ self ++ } ++ } ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: Self) -> Self { ++ use crate::llvm::simd_xor; ++ unsafe { Simd(simd_xor(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: Self) -> Self { ++ use crate::llvm::simd_and; ++ unsafe { Simd(simd_and(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: Self) -> Self { ++ use crate::llvm::simd_or; ++ unsafe { Simd(simd_or(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: Self) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: Self) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: Self) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_bitwise() { ++ ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let m = $id::splat(!z.extract(0)); ++ ++ // Not: ++ assert_eq!(!z, m); ++ assert_eq!(!m, z); ++ ++ // BitAnd: ++ assert_eq!(o & o, o); ++ assert_eq!(o & z, z); ++ assert_eq!(z & o, z); ++ assert_eq!(z & z, z); ++ ++ assert_eq!(t & t, t); ++ assert_eq!(t & o, z); ++ assert_eq!(o & t, z); ++ ++ // BitOr: ++ assert_eq!(o | o, o); ++ assert_eq!(o | z, o); ++ assert_eq!(z | o, o); ++ assert_eq!(z | z, z); ++ ++ assert_eq!(t | t, t); ++ assert_eq!(z | t, t); ++ assert_eq!(t | z, t); ++ ++ // BitXOR: ++ assert_eq!(o ^ o, z); ++ assert_eq!(z ^ z, z); ++ assert_eq!(z ^ o, o); ++ assert_eq!(o ^ z, o); ++ ++ assert_eq!(t ^ t, z); ++ assert_eq!(t ^ z, t); ++ assert_eq!(z ^ t, t); ++ ++ { ++ // AndAssign: ++ let mut v = o; ++ v &= t; ++ assert_eq!(v, z); ++ } ++ { ++ // OrAssign: ++ let mut v = z; ++ v |= o; ++ assert_eq!(v, o); ++ } ++ { ++ // XORAssign: ++ let mut v = z; ++ v ^= o; ++ assert_eq!(v, o); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs +new file mode 100644 +index 000000000000..4126e87042f5 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs +@@ -0,0 +1,69 @@ ++//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. ++ ++macro_rules! impl_ops_vector_float_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Minimum of two vectors. ++ /// ++ /// Returns a new vector containing the minimum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn min(self, x: Self) -> Self { ++ use crate::llvm::simd_fmin; ++ unsafe { Simd(simd_fmin(self.0, x.0)) } ++ } ++ ++ /// Maximum of two vectors. ++ /// ++ /// Returns a new vector containing the maximum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn max(self, x: Self) -> Self { ++ use crate::llvm::simd_fmax; ++ unsafe { Simd(simd_fmax(self.0, x.0)) } ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_max() { ++ let n = crate::$elem_ty::NAN; ++ let o = $id::splat(1. as $elem_ty); ++ let t = $id::splat(2. as $elem_ty); ++ ++ let mut m = o; // [1., 2., 1., 2., ...] ++ let mut on = o; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ m = m.replace(i, 2. as $elem_ty); ++ on = on.replace(i, n); ++ } ++ } ++ ++ assert_eq!(o.min(t), o); ++ assert_eq!(t.min(o), o); ++ assert_eq!(m.min(o), o); ++ assert_eq!(o.min(m), o); ++ assert_eq!(m.min(t), m); ++ assert_eq!(t.min(m), m); ++ ++ assert_eq!(o.max(t), t); ++ assert_eq!(t.max(o), t); ++ assert_eq!(m.max(o), m); ++ assert_eq!(o.max(m), m); ++ assert_eq!(m.max(t), t); ++ assert_eq!(t.max(m), t); ++ ++ assert_eq!(on.min(o), o); ++ assert_eq!(o.min(on), o); ++ assert_eq!(on.max(o), o); ++ assert_eq!(o.max(on), o); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs +new file mode 100644 +index 000000000000..36ea98e6bf32 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs +@@ -0,0 +1,57 @@ ++//! Vertical (lane-wise) vector `min` and `max` for integer vectors. ++ ++macro_rules! impl_ops_vector_int_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Minimum of two vectors. ++ /// ++ /// Returns a new vector containing the minimum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn min(self, x: Self) -> Self { ++ self.lt(x).select(self, x) ++ } ++ ++ /// Maximum of two vectors. ++ /// ++ /// Returns a new vector containing the maximum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn max(self, x: Self) -> Self { ++ self.gt(x).select(self, x) ++ } ++ } ++ test_if!{$test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_max() { ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ ++ let mut m = o; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ m = m.replace(i, 2 as $elem_ty); ++ } ++ } ++ assert_eq!(o.min(t), o); ++ assert_eq!(t.min(o), o); ++ assert_eq!(m.min(o), o); ++ assert_eq!(o.min(m), o); ++ assert_eq!(m.min(t), m); ++ assert_eq!(t.min(m), m); ++ ++ assert_eq!(o.max(t), t); ++ assert_eq!(t.max(o), t); ++ assert_eq!(m.max(o), m); ++ assert_eq!(o.max(m), m); ++ assert_eq!(m.max(t), t); ++ assert_eq!(t.max(m), t); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs +new file mode 100644 +index 000000000000..295fc1ca81c9 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs +@@ -0,0 +1,116 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_vector_mask_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::Not for $id { ++ type Output = Self; ++ #[inline] ++ fn not(self) -> Self { ++ Self::splat($true) ^ self ++ } ++ } ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: Self) -> Self { ++ use crate::llvm::simd_xor; ++ unsafe { Simd(simd_xor(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: Self) -> Self { ++ use crate::llvm::simd_and; ++ unsafe { Simd(simd_and(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: Self) -> Self { ++ use crate::llvm::simd_or; ++ unsafe { Simd(simd_or(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: Self) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: Self) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: Self) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_mask_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_mask_bitwise() { ++ let t = $id::splat(true); ++ let f = $id::splat(false); ++ assert!(t != f); ++ assert!(!(t == f)); ++ ++ // Not: ++ assert_eq!(!t, f); ++ assert_eq!(t, !f); ++ ++ // BitAnd: ++ assert_eq!(t & f, f); ++ assert_eq!(f & t, f); ++ assert_eq!(t & t, t); ++ assert_eq!(f & f, f); ++ ++ // BitOr: ++ assert_eq!(t | f, t); ++ assert_eq!(f | t, t); ++ assert_eq!(t | t, t); ++ assert_eq!(f | f, f); ++ ++ // BitXOR: ++ assert_eq!(t ^ f, t); ++ assert_eq!(f ^ t, t); ++ assert_eq!(t ^ t, f); ++ assert_eq!(f ^ f, f); ++ ++ { ++ // AndAssign: ++ let mut v = f; ++ v &= t; ++ assert_eq!(v, f); ++ } ++ { ++ // OrAssign: ++ let mut v = f; ++ v |= t; ++ assert_eq!(v, t); ++ } ++ { ++ // XORAssign: ++ let mut v = f; ++ v ^= t; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs +new file mode 100644 +index 000000000000..e2d91fd2fed6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs +@@ -0,0 +1,43 @@ ++//! Vertical (lane-wise) vector `Neg`. ++ ++macro_rules! impl_ops_vector_neg { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Neg for $id { ++ type Output = Self; ++ #[inline] ++ fn neg(self) -> Self { ++ Self::splat(-1 as $elem_ty) * self ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_neg>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn neg() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let nz = $id::splat(-(0 as $elem_ty)); ++ let no = $id::splat(-(1 as $elem_ty)); ++ let nt = $id::splat(-(2 as $elem_ty)); ++ let nf = $id::splat(-(4 as $elem_ty)); ++ ++ assert_eq!(-z, nz); ++ assert_eq!(-o, no); ++ assert_eq!(-t, nt); ++ assert_eq!(-f, nf); ++ ++ assert_eq!(z, -nz); ++ assert_eq!(o, -no); ++ assert_eq!(t, -nt); ++ assert_eq!(f, -nf); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs +new file mode 100644 +index 000000000000..6c794ecf4b93 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs +@@ -0,0 +1,90 @@ ++//! Vertical (lane-wise) vector rotates operations. ++#![allow(unused)] ++ ++macro_rules! impl_ops_vector_rotates { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Shifts the bits of each lane to the left by the specified ++ /// amount in the corresponding lane of `n`, wrapping the ++ /// truncated bits to the end of the resulting integer. ++ /// ++ /// Note: this is neither the same operation as `<<` nor equivalent ++ /// to `slice::rotate_left`. ++ #[inline] ++ pub fn rotate_left(self, n: $id) -> $id { ++ const LANE_WIDTH: $elem_ty = ++ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; ++ // Protect against undefined behavior for over-long bit shifts ++ let n = n % LANE_WIDTH; ++ (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) ++ } ++ ++ /// Shifts the bits of each lane to the right by the specified ++ /// amount in the corresponding lane of `n`, wrapping the ++ /// truncated bits to the beginning of the resulting integer. ++ /// ++ /// Note: this is neither the same operation as `<<` nor equivalent ++ /// to `slice::rotate_left`. ++ #[inline] ++ pub fn rotate_right(self, n: $id) -> $id { ++ const LANE_WIDTH: $elem_ty = ++ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; ++ // Protect against undefined behavior for over-long bit shifts ++ let n = n % LANE_WIDTH; ++ (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/75 ++ #[cfg(not(any( ++ target_arch = "s390x", ++ target_arch = "sparc64", ++ )))] ++ pub mod [<$id _ops_vector_rotate>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn rotate_ops() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let max = $id::splat( ++ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); ++ ++ // rotate_right ++ assert_eq!(z.rotate_right(z), z); ++ assert_eq!(z.rotate_right(o), z); ++ assert_eq!(z.rotate_right(t), z); ++ ++ assert_eq!(o.rotate_right(z), o); ++ assert_eq!(t.rotate_right(z), t); ++ assert_eq!(f.rotate_right(z), f); ++ assert_eq!(f.rotate_right(max), f << 1); ++ ++ assert_eq!(o.rotate_right(o), o << max); ++ assert_eq!(t.rotate_right(o), o); ++ assert_eq!(t.rotate_right(t), o << max); ++ assert_eq!(f.rotate_right(o), t); ++ assert_eq!(f.rotate_right(t), o); ++ ++ // rotate_left ++ assert_eq!(z.rotate_left(z), z); ++ assert_eq!(o.rotate_left(z), o); ++ assert_eq!(t.rotate_left(z), t); ++ assert_eq!(f.rotate_left(z), f); ++ assert_eq!(f.rotate_left(max), t); ++ ++ assert_eq!(o.rotate_left(o), t); ++ assert_eq!(o.rotate_left(t), f); ++ assert_eq!(t.rotate_left(o), f); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs +new file mode 100644 +index 000000000000..22e1fbc0ec76 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs +@@ -0,0 +1,107 @@ ++//! Vertical (lane-wise) vector-vector shifts operations. ++ ++macro_rules! impl_ops_vector_shifts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Shl<$id> for $id { ++ type Output = Self; ++ #[inline] ++ fn shl(self, other: Self) -> Self { ++ use crate::llvm::simd_shl; ++ unsafe { Simd(simd_shl(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::Shr<$id> for $id { ++ type Output = Self; ++ #[inline] ++ fn shr(self, other: Self) -> Self { ++ use crate::llvm::simd_shr; ++ unsafe { Simd(simd_shr(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::ShlAssign<$id> for $id { ++ #[inline] ++ fn shl_assign(&mut self, other: Self) { ++ *self = *self << other; ++ } ++ } ++ impl crate::ops::ShrAssign<$id> for $id { ++ #[inline] ++ fn shr_assign(&mut self, other: Self) { ++ *self = *self >> other; ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_shifts>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), ++ allow(unreachable_code, ++ unused_variables, ++ unused_mut) ++ )] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn ops_vector_shifts() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let max =$id::splat( ++ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty ++ ); ++ ++ // shr ++ assert_eq!(z >> z, z); ++ assert_eq!(z >> o, z); ++ assert_eq!(z >> t, z); ++ assert_eq!(z >> t, z); ++ ++ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { ++ // FIXME: rust produces bad codegen for shifts: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/13 ++ return; ++ } ++ ++ assert_eq!(o >> z, o); ++ assert_eq!(t >> z, t); ++ assert_eq!(f >> z, f); ++ assert_eq!(f >> max, z); ++ ++ assert_eq!(o >> o, z); ++ assert_eq!(t >> o, o); ++ assert_eq!(t >> t, z); ++ assert_eq!(f >> o, t); ++ assert_eq!(f >> t, o); ++ assert_eq!(f >> max, z); ++ ++ // shl ++ assert_eq!(z << z, z); ++ assert_eq!(o << z, o); ++ assert_eq!(t << z, t); ++ assert_eq!(f << z, f); ++ assert_eq!(f << max, z); ++ ++ assert_eq!(o << o, t); ++ assert_eq!(o << t, f); ++ assert_eq!(t << o, f); ++ ++ { ++ // shr_assign ++ let mut v = o; ++ v >>= o; ++ assert_eq!(v, z); ++ } ++ { ++ // shl_assign ++ let mut v = o; ++ v <<= o; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs +new file mode 100644 +index 000000000000..d2e523a49faf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ptr.rs +@@ -0,0 +1,4 @@ ++//! Vector of pointers ++ ++#[macro_use] ++mod gather_scatter; +diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs +new file mode 100644 +index 000000000000..9d8e113bb44f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs +@@ -0,0 +1,241 @@ ++//! Implements masked gather and scatters for vectors of pointers ++ ++macro_rules! impl_ptr_read { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident ++ | $test_tt:tt) => { ++ impl $id ++ where ++ [T; $elem_count]: sealed::SimdArray, ++ { ++ /// Reads selected vector elements from memory. ++ /// ++ /// Instantiates a new vector by reading the values from `self` for ++ /// those lanes whose `mask` is `true`, and using the elements of ++ /// `value` otherwise. ++ /// ++ /// No memory is accessed for those lanes of `self` whose `mask` is ++ /// `false`. ++ /// ++ /// # Safety ++ /// ++ /// This method is unsafe because it dereferences raw pointers. The ++ /// pointers must be aligned to `mem::align_of::()`. ++ #[inline] ++ pub unsafe fn read( ++ self, mask: Simd<[M; $elem_count]>, ++ value: Simd<[T; $elem_count]>, ++ ) -> Simd<[T; $elem_count]> ++ where ++ M: sealed::Mask, ++ [M; $elem_count]: sealed::SimdArray, ++ { ++ use crate::llvm::simd_gather; ++ Simd(simd_gather(value.0, self.0, mask.0)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _read>] { ++ use super::*; ++ #[test] ++ fn read() { ++ let mut v = [0_i32; $elem_count]; ++ for i in 0..$elem_count { ++ v[i] = i as i32; ++ } ++ ++ let mut ptr = $id::::null(); ++ ++ for i in 0..$elem_count { ++ ptr = ptr.replace(i, unsafe { ++ crate::mem::transmute(&v[i] as *const i32) ++ }); ++ } ++ ++ // all mask elements are true: ++ let mask = $mask_ty::splat(true); ++ let def = Simd::<[i32; $elem_count]>::splat(42_i32); ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) ++ }; ++ assert_eq!( ++ r, ++ Simd::<[i32; $elem_count]>::from_slice_unaligned( ++ &v ++ ) ++ ); ++ ++ let mut mask = mask; ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ mask = mask.replace(i, false); ++ } ++ } ++ ++ // even mask elements are true, odd ones are false: ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) ++ }; ++ let mut e = v; ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ e[i] = 42; ++ } ++ } ++ assert_eq!( ++ r, ++ Simd::<[i32; $elem_count]>::from_slice_unaligned( ++ &e ++ ) ++ ); ++ ++ // all mask elements are false: ++ let mask = $mask_ty::splat(false); ++ let def = Simd::<[i32; $elem_count]>::splat(42_i32); ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) } ++ ; ++ assert_eq!(r, def); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_ptr_write { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident ++ | $test_tt:tt) => { ++ impl $id ++ where ++ [T; $elem_count]: sealed::SimdArray, ++ { ++ /// Writes selected vector elements to memory. ++ /// ++ /// Writes the lanes of `values` for which the mask is `true` to ++ /// their corresponding memory addresses in `self`. ++ /// ++ /// No memory is accessed for those lanes of `self` whose `mask` is ++ /// `false`. ++ /// ++ /// Overlapping memory addresses of `self` are written to in order ++ /// from the lest-significant to the most-significant element. ++ /// ++ /// # Safety ++ /// ++ /// This method is unsafe because it dereferences raw pointers. The ++ /// pointers must be aligned to `mem::align_of::()`. ++ #[inline] ++ pub unsafe fn write( ++ self, mask: Simd<[M; $elem_count]>, ++ value: Simd<[T; $elem_count]>, ++ ) where ++ M: sealed::Mask, ++ [M; $elem_count]: sealed::SimdArray, ++ { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/85 ++ #[cfg(not(target_arch = "mips"))] ++ { ++ use crate::llvm::simd_scatter; ++ simd_scatter(value.0, self.0, mask.0) ++ } ++ #[cfg(target_arch = "mips")] ++ { ++ let m_ptr = ++ &mask as *const Simd<[M; $elem_count]> as *const M; ++ for i in 0..$elem_count { ++ let m = ptr::read(m_ptr.add(i)); ++ if m.test() { ++ let t_ptr = &self ++ as *const Simd<[*mut T; $elem_count]> ++ as *mut *mut T; ++ let v_ptr = &value as *const Simd<[T; $elem_count]> ++ as *const T; ++ ptr::write( ++ ptr::read(t_ptr.add(i)), ++ ptr::read(v_ptr.add(i)), ++ ); ++ } ++ } ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _write>] { ++ use super::*; ++ #[test] ++ fn write() { ++ // fourty_two = [42, 42, 42, ...] ++ let fourty_two ++ = Simd::<[i32; $elem_count]>::splat(42_i32); ++ ++ // This test will write to this array ++ let mut arr = [0_i32; $elem_count]; ++ for i in 0..$elem_count { ++ arr[i] = i as i32; ++ } ++ // arr = [0, 1, 2, ...] ++ ++ let mut ptr = $id::::null(); ++ for i in 0..$elem_count { ++ ptr = ptr.replace(i, unsafe { ++ crate::mem::transmute(arr.as_ptr().add(i)) ++ }); ++ } ++ // ptr = [&arr[0], &arr[1], ...] ++ ++ // write `fourty_two` to all elements of `v` ++ { ++ let backup = arr; ++ unsafe { ++ ptr.write($mask_ty::splat(true), fourty_two) ++ }; ++ assert_eq!(arr, [42_i32; $elem_count]); ++ arr = backup; // arr = [0, 1, 2, ...] ++ } ++ ++ // write 42 to even elements of arr: ++ { ++ // set odd elements of the mask to false ++ let mut mask = $mask_ty::splat(true); ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ mask = mask.replace(i, false); ++ } ++ } ++ // mask = [true, false, true, false, ...] ++ ++ // expected result r = [42, 1, 42, 3, 42, 5, ...] ++ let mut r = arr; ++ for i in 0..$elem_count { ++ if i % 2 == 0 { ++ r[i] = 42; ++ } ++ } ++ ++ let backup = arr; ++ unsafe { ptr.write(mask, fourty_two) }; ++ assert_eq!(arr, r); ++ arr = backup; // arr = [0, 1, 2, 3, ...] ++ } ++ ++ // write 42 to no elements of arr ++ { ++ let backup = arr; ++ unsafe { ++ ptr.write($mask_ty::splat(false), fourty_two) ++ }; ++ assert_eq!(arr, backup); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs +new file mode 100644 +index 000000000000..54d2f0cc7f08 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions.rs +@@ -0,0 +1,12 @@ ++//! Reductions ++ ++#[macro_use] ++mod float_arithmetic; ++#[macro_use] ++mod integer_arithmetic; ++#[macro_use] ++mod bitwise; ++#[macro_use] ++mod mask; ++#[macro_use] ++mod min_max; +diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs +new file mode 100644 +index 000000000000..5bad4f474b16 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs +@@ -0,0 +1,151 @@ ++//! Implements portable horizontal bitwise vector reductions. ++#![allow(unused)] ++ ++macro_rules! impl_reduction_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $ielem_ty:ident | $test_tt:tt | ++ ($convert:expr) | ++ ($true:expr, $false:expr) ++ ) => { ++ impl $id { ++ /// Lane-wise bitwise `and` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn and(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_and; ++ let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x &= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Lane-wise bitwise `or` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn or(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_or; ++ let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x |= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Lane-wise bitwise `xor` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn xor(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_xor; ++ let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x ^= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_bitwise>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn and() { ++ let v = $id::splat($false); ++ assert_eq!(v.and(), $false); ++ let v = $id::splat($true); ++ assert_eq!(v.and(), $true); ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ if $id::lanes() > 1 { ++ assert_eq!(v.and(), $false); ++ } else { ++ assert_eq!(v.and(), $true); ++ } ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ assert_eq!(v.and(), $false); ++ ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn or() { ++ let v = $id::splat($false); ++ assert_eq!(v.or(), $false); ++ let v = $id::splat($true); ++ assert_eq!(v.or(), $true); ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ assert_eq!(v.or(), $true); ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ if $id::lanes() > 1 { ++ assert_eq!(v.or(), $true); ++ } else { ++ assert_eq!(v.or(), $false); ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn xor() { ++ let v = $id::splat($false); ++ assert_eq!(v.xor(), $false); ++ let v = $id::splat($true); ++ if $id::lanes() > 1 { ++ assert_eq!(v.xor(), $false); ++ } else { ++ assert_eq!(v.xor(), $true); ++ } ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ assert_eq!(v.xor(), $true); ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ if $id::lanes() > 1 { ++ assert_eq!(v.xor(), $true); ++ } else { ++ assert_eq!(v.xor(), $false); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs +new file mode 100644 +index 000000000000..dd722ae25fdd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs +@@ -0,0 +1,312 @@ ++//! Implements portable horizontal float vector arithmetic reductions. ++ ++macro_rules! impl_reduction_float_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Horizontal sum of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) ++ /// ++ /// If one of the vector element is `NaN` the reduction returns ++ /// `NaN`. The resulting `NaN` is not required to be equal to any ++ /// of the `NaN`s in the vector. ++ #[inline] ++ pub fn sum(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_add_ordered; ++ unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x += self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Horizontal product of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) ++ /// ++ /// If one of the vector element is `NaN` the reduction returns ++ /// `NaN`. The resulting `NaN` is not required to be equal to any ++ /// of the `NaN`s in the vector. ++ #[inline] ++ pub fn product(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_mul_ordered; ++ unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x *= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ } ++ ++ impl crate::iter::Sum for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0.), crate::ops::Add::add) ++ } ++ } ++ ++ impl crate::iter::Product for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1.), crate::ops::Mul::mul) ++ } ++ } ++ ++ impl<'a> crate::iter::Sum<&'a $id> for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) ++ } ++ } ++ ++ impl<'a> crate::iter::Product<&'a $id> for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_float_arith>] { ++ use super::*; ++ fn alternating(x: usize) -> $id { ++ let mut v = $id::splat(1 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i % x == 0 { ++ v = v.replace(i, 2 as $elem_ty); ++ } ++ } ++ v ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sum() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.sum(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.sum(), $id::lanes() as $elem_ty); ++ let v = alternating(2); ++ assert_eq!( ++ v.sum(), ++ ($id::lanes() / 2 + $id::lanes()) as $elem_ty ++ ); ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn product() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.product(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.product(), 1 as $elem_ty); ++ let f = match $id::lanes() { ++ 64 => 16, ++ 32 => 8, ++ 16 => 4, ++ _ => 2, ++ }; ++ let v = alternating(f); ++ assert_eq!( ++ v.product(), ++ (2_usize.pow(($id::lanes() / f) as u32) ++ as $elem_ty) ++ ); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unreachable_code)] ++ #[allow(unused_mut)] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn sum_nan() { ++ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/6 ++ return; ++ ++ let n0 = crate::$elem_ty::NAN; ++ let v0 = $id::splat(-3.0); ++ for i in 0..$id::lanes() { ++ let mut v = v0.replace(i, n0); ++ // If the vector contains a NaN the result is NaN: ++ assert!( ++ v.sum().is_nan(), ++ "nan at {} => {} | {:?}", ++ i, ++ v.sum(), ++ v ++ ); ++ for j in 0..i { ++ v = v.replace(j, n0); ++ assert!(v.sum().is_nan()); ++ } ++ } ++ let v = $id::splat(n0); ++ assert!(v.sum().is_nan(), "all nans | {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unreachable_code)] ++ #[allow(unused_mut)] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn product_nan() { ++ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/6 ++ return; ++ ++ let n0 = crate::$elem_ty::NAN; ++ let v0 = $id::splat(-3.0); ++ for i in 0..$id::lanes() { ++ let mut v = v0.replace(i, n0); ++ // If the vector contains a NaN the result is NaN: ++ assert!( ++ v.product().is_nan(), ++ "nan at {} => {} | {:?}", ++ i, ++ v.product(), ++ v ++ ); ++ for j in 0..i { ++ v = v.replace(j, n0); ++ assert!(v.product().is_nan()); ++ } ++ } ++ let v = $id::splat(n0); ++ assert!(v.product().is_nan(), "all nans | {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unused, dead_code)] ++ fn sum_roundoff() { ++ // Performs a tree-reduction ++ fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { ++ assert!(!a.is_empty()); ++ if a.len() == 1 { ++ a[0] ++ } else if a.len() == 2 { ++ a[0] + a[1] ++ } else { ++ let mid = a.len() / 2; ++ let (left, right) = a.split_at(mid); ++ tree_reduce_sum(left) + tree_reduce_sum(right) ++ } ++ } ++ ++ let mut start = crate::$elem_ty::EPSILON; ++ let mut scalar_reduction = 0. as $elem_ty; ++ ++ let mut v = $id::splat(0. as $elem_ty); ++ for i in 0..$id::lanes() { ++ let c = if i % 2 == 0 { 1e3 } else { -1. }; ++ start *= 3.14 * c; ++ scalar_reduction += start; ++ v = v.replace(i, start); ++ } ++ let simd_reduction = v.sum(); ++ ++ let mut a = [0. as $elem_ty; $id::lanes()]; ++ v.write_to_slice_unaligned(&mut a); ++ let tree_reduction = tree_reduce_sum(&a); ++ ++ // tolerate 1 ULP difference: ++ let red_bits = simd_reduction.to_bits(); ++ let tree_bits = tree_reduction.to_bits(); ++ assert!( ++ if red_bits > tree_bits { ++ red_bits - tree_bits ++ } else { ++ tree_bits - red_bits ++ } < 2, ++ "vector: {:?} | simd_reduction: {:?} | \ ++ tree_reduction: {} | scalar_reduction: {}", ++ v, ++ simd_reduction, ++ tree_reduction, ++ scalar_reduction ++ ); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unused, dead_code)] ++ fn product_roundoff() { ++ // Performs a tree-reduction ++ fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { ++ assert!(!a.is_empty()); ++ if a.len() == 1 { ++ a[0] ++ } else if a.len() == 2 { ++ a[0] * a[1] ++ } else { ++ let mid = a.len() / 2; ++ let (left, right) = a.split_at(mid); ++ tree_reduce_product(left) ++ * tree_reduce_product(right) ++ } ++ } ++ ++ let mut start = crate::$elem_ty::EPSILON; ++ let mut scalar_reduction = 1. as $elem_ty; ++ ++ let mut v = $id::splat(0. as $elem_ty); ++ for i in 0..$id::lanes() { ++ let c = if i % 2 == 0 { 1e3 } else { -1. }; ++ start *= 3.14 * c; ++ scalar_reduction *= start; ++ v = v.replace(i, start); ++ } ++ let simd_reduction = v.product(); ++ ++ let mut a = [0. as $elem_ty; $id::lanes()]; ++ v.write_to_slice_unaligned(&mut a); ++ let tree_reduction = tree_reduce_product(&a); ++ ++ // tolerate 1 ULP difference: ++ let red_bits = simd_reduction.to_bits(); ++ let tree_bits = tree_reduction.to_bits(); ++ assert!( ++ if red_bits > tree_bits { ++ red_bits - tree_bits ++ } else { ++ tree_bits - red_bits ++ } < 2, ++ "vector: {:?} | simd_reduction: {:?} | \ ++ tree_reduction: {} | scalar_reduction: {}", ++ v, ++ simd_reduction, ++ tree_reduction, ++ scalar_reduction ++ ); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs +new file mode 100644 +index 000000000000..91dffad31032 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs +@@ -0,0 +1,197 @@ ++//! Implements portable horizontal integer vector arithmetic reductions. ++ ++macro_rules! impl_reduction_integer_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident ++ | $test_tt:tt) => { ++ impl $id { ++ /// Horizontal wrapping sum of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) ++ /// ++ /// If an operation overflows it returns the mathematical result ++ /// modulo `2^n` where `n` is the number of times it overflows. ++ #[inline] ++ pub fn wrapping_sum(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_add_ordered; ++ let v: $ielem_ty = unsafe { ++ simd_reduce_add_ordered(self.0, 0 as $ielem_ty) ++ }; ++ v as $elem_ty ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x = x.wrapping_add(self.extract(i) as $elem_ty); ++ } ++ x ++ } ++ } ++ ++ /// Horizontal wrapping product of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) ++ /// ++ /// If an operation overflows it returns the mathematical result ++ /// modulo `2^n` where `n` is the number of times it overflows. ++ #[inline] ++ pub fn wrapping_product(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_mul_ordered; ++ let v: $ielem_ty = unsafe { ++ simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) ++ }; ++ v as $elem_ty ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x = x.wrapping_mul(self.extract(i) as $elem_ty); ++ } ++ x ++ } ++ } ++ } ++ ++ impl crate::iter::Sum for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0), crate::ops::Add::add) ++ } ++ } ++ ++ impl crate::iter::Product for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1), crate::ops::Mul::mul) ++ } ++ } ++ ++ impl<'a> crate::iter::Sum<&'a $id> for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) ++ } ++ } ++ ++ impl<'a> crate::iter::Product<&'a $id> for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_int_arith>] { ++ use super::*; ++ ++ fn alternating(x: usize) -> $id { ++ let mut v = $id::splat(1 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i % x == 0 { ++ v = v.replace(i, 2 as $elem_ty); ++ } ++ } ++ v ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_sum() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.wrapping_sum(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); ++ let v = alternating(2); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ v.wrapping_sum(), ++ ($id::lanes() / 2 + $id::lanes()) as $elem_ty ++ ); ++ } else { ++ assert_eq!( ++ v.wrapping_sum(), ++ 2 as $elem_ty ++ ); ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_sum_overflow() { ++ let start = $elem_ty::max_value() ++ - ($id::lanes() as $elem_ty / 2); ++ ++ let v = $id::splat(start as $elem_ty); ++ let vwrapping_sum = v.wrapping_sum(); ++ ++ let mut wrapping_sum = start; ++ for _ in 1..$id::lanes() { ++ wrapping_sum = wrapping_sum.wrapping_add(start); ++ } ++ assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_product() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.wrapping_product(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.wrapping_product(), 1 as $elem_ty); ++ let f = match $id::lanes() { ++ 64 => 16, ++ 32 => 8, ++ 16 => 4, ++ _ => 2, ++ }; ++ let v = alternating(f); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ v.wrapping_product(), ++ (2_usize.pow(($id::lanes() / f) as u32) ++ as $elem_ty) ++ ); ++ } else { ++ assert_eq!( ++ v.wrapping_product(), ++ 2 as $elem_ty ++ ); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_product_overflow() { ++ let start = $elem_ty::max_value() ++ - ($id::lanes() as $elem_ty / 2); ++ ++ let v = $id::splat(start as $elem_ty); ++ let vmul = v.wrapping_product(); ++ ++ let mut mul = start; ++ for _ in 1..$id::lanes() { ++ mul = mul.wrapping_mul(start); ++ } ++ assert_eq!(mul, vmul, "v = {:?}", v); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs +new file mode 100644 +index 000000000000..0dd6a84e7e8d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs +@@ -0,0 +1,89 @@ ++//! Implements portable horizontal mask reductions. ++ ++macro_rules! impl_reduction_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Are `all` vector lanes `true`? ++ #[inline] ++ pub fn all(self) -> bool { ++ unsafe { crate::codegen::reductions::mask::All::all(self) } ++ } ++ /// Is `any` vector lane `true`? ++ #[inline] ++ pub fn any(self) -> bool { ++ unsafe { crate::codegen::reductions::mask::Any::any(self) } ++ } ++ /// Are `all` vector lanes `false`? ++ #[inline] ++ pub fn none(self) -> bool { ++ !self.any() ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn all() { ++ let a = $id::splat(true); ++ assert!(a.all()); ++ let a = $id::splat(false); ++ assert!(!a.all()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(!a.all()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(!a.all()); ++ } ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn any() { ++ let a = $id::splat(true); ++ assert!(a.any()); ++ let a = $id::splat(false); ++ assert!(!a.any()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(a.any()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(a.any()); ++ } ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn none() { ++ let a = $id::splat(true); ++ assert!(!a.none()); ++ let a = $id::splat(false); ++ assert!(a.none()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(!a.none()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(!a.none()); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs +new file mode 100644 +index 000000000000..c4d3aa10f15c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs +@@ -0,0 +1,377 @@ ++//! Implements portable horizontal vector min/max reductions. ++ ++macro_rules! impl_reduction_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident ++ | $ielem_ty:ident | $test_tt:tt) => { ++ impl $id { ++ /// Largest vector element value. ++ #[inline] ++ pub fn max_element(self) -> $elem_ty { ++ #[cfg(not(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ )))] ++ { ++ use crate::llvm::simd_reduce_max; ++ let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; ++ v as $elem_ty ++ } ++ #[cfg(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ))] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ // FIXME: broken on WASM32 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/91 ++ let mut x = self.extract(0); ++ for i in 1..$id::lanes() { ++ x = x.max(self.extract(i)); ++ } ++ x ++ } ++ } ++ ++ /// Smallest vector element value. ++ #[inline] ++ pub fn min_element(self) -> $elem_ty { ++ #[cfg(not(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ all(target_arch = "x86", not(target_feature = "sse2")), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ),))] ++ { ++ use crate::llvm::simd_reduce_min; ++ let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; ++ v as $elem_ty ++ } ++ #[cfg(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ all(target_arch = "x86", not(target_feature = "sse2")), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ))] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ // FIXME: broken on i586-unknown-linux-gnu ++ // https://github.com/rust-lang-nursery/packed_simd/issues/22 ++ // FIXME: broken on WASM32 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/91 ++ let mut x = self.extract(0); ++ for i in 1..$id::lanes() { ++ x = x.min(self.extract(i)); ++ } ++ x ++ } ++ } ++ } ++ test_if! {$test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ pub fn max_element() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.max_element(), 0 as $elem_ty); ++ if $id::lanes() > 1 { ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.max_element(), 1 as $elem_ty); ++ } ++ let v = v.replace(0, 2 as $elem_ty); ++ assert_eq!(v.max_element(), 2 as $elem_ty); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ pub fn min_element() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.min_element(), 0 as $elem_ty); ++ if $id::lanes() > 1 { ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.min_element(), 0 as $elem_ty); ++ } ++ let v = $id::splat(1 as $elem_ty); ++ let v = v.replace(0, 2 as $elem_ty); ++ if $id::lanes() > 1 { ++ assert_eq!(v.min_element(), 1 as $elem_ty); ++ } else { ++ assert_eq!(v.min_element(), 2 as $elem_ty); ++ } ++ if $id::lanes() > 1 { ++ let v = $id::splat(2 as $elem_ty); ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.min_element(), 1 as $elem_ty); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_reduction_float_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_min_max_nan>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_element_test() { ++ let n = crate::$elem_ty::NAN; ++ ++ assert_eq!(n.min(-3.), -3.); ++ assert_eq!((-3. as $elem_ty).min(n), -3.); ++ ++ let v0 = $id::splat(-3.); ++ ++ let target_with_broken_last_lane_nan = !cfg!(any( ++ target_arch = "arm", target_arch = "aarch64", ++ all(target_arch = "x86", ++ not(target_feature = "sse2") ++ ), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ )); ++ ++ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] ++ for i in 0..$id::lanes() { ++ // We replace the i-th element of the vector with ++ // `NaN`: [-3, -3, -3, NaN] ++ let mut v = v0.replace(i, n); ++ ++ // If the NaN is in the last place, the LLVM ++ // implementation of these methods is broken on some ++ // targets: ++ if i == $id::lanes() - 1 && ++ target_with_broken_last_lane_nan { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/5 ++ // ++ // If there is a NaN, the result should always ++ // the smallest element, but currently when the ++ // last element is NaN the current ++ // implementation incorrectly returns NaN. ++ // ++ // The targets mentioned above use different ++ // codegen that produces the correct result. ++ // ++ // These asserts detect if this behavior changes ++ assert!(v.min_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[A]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result ++ // is still always `-3.` unless all elements of ++ // the vector are `NaN`s: ++ // ++ // This is also broken: ++ for j in 0..i { ++ v = v.replace(j, n); ++ assert!(v.min_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[B]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ } ++ ++ // We are done here, since we were in the last ++ // lane which is the last iteration of the loop. ++ break ++ } ++ ++ // We are not in the last lane, and there is only ++ // one `NaN` in the vector. ++ ++ // If the vector has one lane, the result is `NaN`: ++ if $id::lanes() == 1 { ++ assert!(v.min_element().is_nan(), ++ "[C]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.min_element(), ++ v.min_element().is_nan() ++ ); ++ ++ // And we are done, since the vector only has ++ // one lane anyways. ++ break; ++ } ++ ++ // The vector has more than one lane, since there is ++ // only one `NaN` in the vector, the result is ++ // always `-3`. ++ assert_eq!(v.min_element(), -3., ++ "[D]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result is ++ // still always `-3.` unless all elements of the ++ // vector are `NaN`s: ++ for j in 0..i { ++ v = v.replace(j, n); ++ ++ if i == $id::lanes() - 1 && j == i - 1 { ++ // All elements of the vector are `NaN`s, ++ // therefore the result is NaN as well. ++ // ++ // Note: the #lanes of the vector is > 1, so ++ // "i - 1" does not overflow. ++ assert!(v.min_element().is_nan(), ++ "[E]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.min_element(), ++ v.min_element().is_nan()); ++ } else { ++ // There are non-`NaN` elements in the ++ // vector, therefore the result is `-3.`: ++ assert_eq!(v.min_element(), -3., ++ "[F]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ } ++ } ++ } ++ ++ // If the vector contains all NaNs the result is NaN: ++ assert!($id::splat(n).min_element().is_nan(), ++ "all nans | v={:?} | min={} | is_nan: {}", ++ $id::splat(n), $id::splat(n).min_element(), ++ $id::splat(n).min_element().is_nan()); ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn max_element_test() { ++ let n = crate::$elem_ty::NAN; ++ ++ assert_eq!(n.max(-3.), -3.); ++ assert_eq!((-3. as $elem_ty).max(n), -3.); ++ ++ let v0 = $id::splat(-3.); ++ ++ let target_with_broken_last_lane_nan = !cfg!(any( ++ target_arch = "arm", target_arch = "aarch64", ++ target_arch = "powerpc64", target_arch = "wasm32", ++ )); ++ ++ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] ++ for i in 0..$id::lanes() { ++ // We replace the i-th element of the vector with ++ // `NaN`: [-3, -3, -3, NaN] ++ let mut v = v0.replace(i, n); ++ ++ // If the NaN is in the last place, the LLVM ++ // implementation of these methods is broken on some ++ // targets: ++ if i == $id::lanes() - 1 && ++ target_with_broken_last_lane_nan { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/5 ++ // ++ // If there is a NaN, the result should ++ // always the largest element, but currently ++ // when the last element is NaN the current ++ // implementation incorrectly returns NaN. ++ // ++ // The targets mentioned above use different ++ // codegen that produces the correct result. ++ // ++ // These asserts detect if this behavior ++ // changes ++ assert!(v.max_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[A]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result ++ // is still always `-3.` unless all elements of ++ // the vector are `NaN`s: ++ // ++ // This is also broken: ++ for j in 0..i { ++ v = v.replace(j, n); ++ assert!(v.max_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[B]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ } ++ ++ // We are done here, since we were in the last ++ // lane which is the last iteration of the loop. ++ break ++ } ++ ++ // We are not in the last lane, and there is only ++ // one `NaN` in the vector. ++ ++ // If the vector has one lane, the result is `NaN`: ++ if $id::lanes() == 1 { ++ assert!(v.max_element().is_nan(), ++ "[C]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.max_element(), ++ v.max_element().is_nan()); ++ ++ // And we are done, since the vector only has ++ // one lane anyways. ++ break; ++ } ++ ++ // The vector has more than one lane, since there is ++ // only one `NaN` in the vector, the result is ++ // always `-3`. ++ assert_eq!(v.max_element(), -3., ++ "[D]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result is ++ // still always `-3.` unless all elements of the ++ // vector are `NaN`s: ++ for j in 0..i { ++ v = v.replace(j, n); ++ ++ if i == $id::lanes() - 1 && j == i - 1 { ++ // All elements of the vector are `NaN`s, ++ // therefore the result is NaN as well. ++ // ++ // Note: the #lanes of the vector is > 1, so ++ // "i - 1" does not overflow. ++ assert!(v.max_element().is_nan(), ++ "[E]: all nans | v={:?} | max={} | \ ++ is_nan: {}", ++ v, v.max_element(), ++ v.max_element().is_nan()); ++ } else { ++ // There are non-`NaN` elements in the ++ // vector, therefore the result is `-3.`: ++ assert_eq!(v.max_element(), -3., ++ "[F]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ } ++ } ++ } ++ ++ // If the vector contains all NaNs the result is NaN: ++ assert!($id::splat(n).max_element().is_nan(), ++ "all nans | v={:?} | max={} | is_nan: {}", ++ $id::splat(n), $id::splat(n).max_element(), ++ $id::splat(n).max_element().is_nan()); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs +new file mode 100644 +index 000000000000..24525df56c73 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/select.rs +@@ -0,0 +1,75 @@ ++//! Implements mask's `select`. ++ ++/// Implements mask select method ++macro_rules! impl_select { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Selects elements of `a` and `b` using mask. ++ /// ++ /// The lanes of the result for which the mask is `true` contain ++ /// the values of `a`. The remaining lanes contain the values of ++ /// `b`. ++ #[inline] ++ pub fn select(self, a: Simd, b: Simd) -> Simd ++ where ++ T: sealed::SimdArray< ++ NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, ++ >, ++ { ++ use crate::llvm::simd_select; ++ Simd(unsafe { simd_select(self.0, a.0, b.0) }) ++ } ++ } ++ ++ test_select!(bool, $id, $id, (false, true) | $test_tt); ++ }; ++} ++ ++macro_rules! test_select { ++ ( ++ $elem_ty:ident, ++ $mask_ty:ident, ++ $vec_ty:ident,($small:expr, $large:expr) | ++ $test_tt:tt ++ ) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$vec_ty _select>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn select() { ++ let o = $small as $elem_ty; ++ let t = $large as $elem_ty; ++ ++ let a = $vec_ty::splat(o); ++ let b = $vec_ty::splat(t); ++ let m = a.lt(b); ++ assert_eq!(m.select(a, b), a); ++ ++ let m = b.lt(a); ++ assert_eq!(m.select(b, a), a); ++ ++ let mut c = a; ++ let mut d = b; ++ let mut m_e = $mask_ty::splat(false); ++ for i in 0..$vec_ty::lanes() { ++ if i % 2 == 0 { ++ let c_tmp = c.extract(i); ++ c = c.replace(i, d.extract(i)); ++ d = d.replace(i, c_tmp); ++ } else { ++ m_e = m_e.replace(i, true); ++ } ++ } ++ ++ let m = c.lt(d); ++ assert_eq!(m_e, m); ++ assert_eq!(m.select(c, d), a); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs +new file mode 100644 +index 000000000000..13a7fae5fcee +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/shuffle.rs +@@ -0,0 +1,190 @@ ++//! Implements portable vector shuffles with immediate indices. ++ ++// FIXME: comprehensive tests ++// https://github.com/rust-lang-nursery/packed_simd/issues/20 ++ ++/// Shuffles vector elements. ++/// ++/// This macro returns a new vector that contains a shuffle of the elements in ++/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, ++/// [indices...])`) input vectors. ++/// ++/// The type of `vec0` and `vec1` must be equal, and the element type of the ++/// resulting vector is the element type of the input vector. ++/// ++/// The number of `indices` must be a power-of-two in range `[0, 64)`, since ++/// currently, the largest vector supported by the library has 64 lanes. The ++/// length of the resulting vector equals the number of indices provided. ++/// ++/// The indices must be in range `[0, M * N)` where `M` is the number of input ++/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. ++/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, ++/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of ++/// `vec1`. ++/// ++/// # Examples ++/// ++/// Shuffling elements of two vectors: ++/// ++/// ``` ++/// # #[macro_use] ++/// # extern crate packed_simd; ++/// # use packed_simd::*; ++/// # fn main() { ++/// // Shuffle allows reordering the elements: ++/// let x = i32x4::new(1, 2, 3, 4); ++/// let y = i32x4::new(5, 6, 7, 8); ++/// let r = shuffle!(x, y, [4, 0, 5, 1]); ++/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); ++/// ++/// // The resulting vector can als be smaller than the input: ++/// let r = shuffle!(x, y, [1, 6]); ++/// assert_eq!(r, i32x2::new(2, 7)); ++/// ++/// // Or larger: ++/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); ++/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); ++/// // At most 2 * the number of lanes in the input vector. ++/// # } ++/// ``` ++/// ++/// Shuffling elements of one vector: ++/// ++/// ``` ++/// # #[macro_use] ++/// # extern crate packed_simd; ++/// # use packed_simd::*; ++/// # fn main() { ++/// // Shuffle allows reordering the elements of a vector: ++/// let x = i32x4::new(1, 2, 3, 4); ++/// let r = shuffle!(x, [2, 1, 3, 0]); ++/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); ++/// ++/// // The resulting vector can be smaller than the input: ++/// let r = shuffle!(x, [1, 3]); ++/// assert_eq!(r, i32x2::new(2, 4)); ++/// ++/// // Equal: ++/// let r = shuffle!(x, [1, 3, 2, 0]); ++/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); ++/// ++/// // Or larger: ++/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); ++/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); ++/// // At most 2 * the number of lanes in the input vector. ++/// # } ++/// ``` ++#[macro_export] ++macro_rules! shuffle { ++ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector2( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector4( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1, $l2, $l3], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector8( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector16( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, ++ ], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr, ++ $l16:expr, $l17:expr, $l18:expr, $l19:expr, ++ $l20:expr, $l21:expr, $l22:expr, $l23:expr, ++ $l24:expr, $l25:expr, $l26:expr, $l27:expr, ++ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector32( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, ++ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, ++ $l29, $l30, $l31, ++ ], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr, ++ $l16:expr, $l17:expr, $l18:expr, $l19:expr, ++ $l20:expr, $l21:expr, $l22:expr, $l23:expr, ++ $l24:expr, $l25:expr, $l26:expr, $l27:expr, ++ $l28:expr, $l29:expr, $l30:expr, $l31:expr, ++ $l32:expr, $l33:expr, $l34:expr, $l35:expr, ++ $l36:expr, $l37:expr, $l38:expr, $l39:expr, ++ $l40:expr, $l41:expr, $l42:expr, $l43:expr, ++ $l44:expr, $l45:expr, $l46:expr, $l47:expr, ++ $l48:expr, $l49:expr, $l50:expr, $l51:expr, ++ $l52:expr, $l53:expr, $l54:expr, $l55:expr, ++ $l56:expr, $l57:expr, $l58:expr, $l59:expr, ++ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector64( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, ++ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, ++ $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, ++ $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, ++ $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, ++ $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, ++ ], ++ )) ++ } ++ }}; ++ ($vec:expr, [$($l:expr),*]) => { ++ match $vec { ++ v => shuffle!(v, v, [$($l),*]) ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs +new file mode 100644 +index 000000000000..64536be6cba1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs +@@ -0,0 +1,159 @@ ++//! Shuffle vector elements according to a dynamic vector of indices. ++ ++macro_rules! impl_shuffle1_dyn { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Shuffle vector elements according to `indices`. ++ #[inline] ++ pub fn shuffle1_dyn(self, indices: I) -> Self ++ where ++ Self: codegen::shuffle1_dyn::Shuffle1Dyn, ++ { ++ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) ++ } ++ } ++ }; ++} ++ ++macro_rules! test_shuffle1_dyn { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ let increasing = { ++ let mut v = $id::splat(0 as $elem_ty); ++ for i in 0..$id::lanes() { ++ v = v.replace(i, i as $elem_ty); ++ } ++ v ++ }; ++ let decreasing = { ++ let mut v = $id::splat(0 as $elem_ty); ++ for i in 0..$id::lanes() { ++ v = v.replace( ++ i, ++ ($id::lanes() - 1 - i) as $elem_ty ++ ); ++ } ++ v ++ }; ++ ++ type Indices = < ++ $id as codegen::shuffle1_dyn::Shuffle1Dyn ++ >::Indices; ++ let increasing_ids: Indices = increasing.cast(); ++ let decreasing_ids: Indices = decreasing.cast(); ++ ++ assert_eq!( ++ increasing.shuffle1_dyn(increasing_ids), ++ increasing, ++ "(i,i)=>i" ++ ); ++ assert_eq!( ++ decreasing.shuffle1_dyn(increasing_ids), ++ decreasing, ++ "(d,i)=>d" ++ ); ++ assert_eq!( ++ increasing.shuffle1_dyn(decreasing_ids), ++ decreasing, ++ "(i,d)=>d" ++ ); ++ assert_eq!( ++ decreasing.shuffle1_dyn(decreasing_ids), ++ increasing, ++ "(d,d)=>i" ++ ); ++ ++ for i in 0..$id::lanes() { ++ let v_ids: Indices ++ = $id::splat(i as $elem_ty).cast(); ++ assert_eq!(increasing.shuffle1_dyn(v_ids), ++ $id::splat(increasing.extract(i)) ++ ); ++ assert_eq!(decreasing.shuffle1_dyn(v_ids), ++ $id::splat(decreasing.extract(i)) ++ ); ++ assert_eq!( ++ $id::splat(i as $elem_ty) ++ .shuffle1_dyn(increasing_ids), ++ $id::splat(i as $elem_ty) ++ ); ++ assert_eq!( ++ $id::splat(i as $elem_ty) ++ .shuffle1_dyn(decreasing_ids), ++ $id::splat(i as $elem_ty) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_shuffle1_dyn_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ // alternating = [true, false, true, false, ...] ++ let mut alternating = $id::splat(false); ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ alternating = alternating.replace(i, true); ++ } ++ } ++ ++ type Indices = < ++ $id as codegen::shuffle1_dyn::Shuffle1Dyn ++ >::Indices; ++ // even = [0, 0, 2, 2, 4, 4, ..] ++ let even = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 - 1).into()); ++ } ++ } ++ v ++ }; ++ // odd = [1, 1, 3, 3, 5, 5, ...] ++ let odd = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::lanes() { ++ if i % 2 != 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 + 1).into()); ++ } ++ } ++ v ++ }; ++ ++ assert_eq!( ++ alternating.shuffle1_dyn(even), ++ $id::splat(true) ++ ); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ alternating.shuffle1_dyn(odd), ++ $id::splat(false) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs +new file mode 100644 +index 000000000000..526b848b5c06 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice.rs +@@ -0,0 +1,7 @@ ++//! Slice from/to methods ++ ++#[macro_use] ++mod from_slice; ++ ++#[macro_use] ++mod write_to_slice; +diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs +new file mode 100644 +index 000000000000..109cd1f10b01 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs +@@ -0,0 +1,216 @@ ++//! Implements methods to read a vector type from a slice. ++ ++macro_rules! impl_slice_from_slice { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary. ++ #[inline] ++ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ assert_eq!( ++ target_ptr ++ .align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ Self::from_slice_aligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ Self::from_slice_unaligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary, the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_aligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ debug_assert_eq!( ++ target_ptr.align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ ++ #[allow(clippy::cast_ptr_alignment)] ++ *(target_ptr as *const Self) ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_unaligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ use crate::mem::size_of; ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked(0) as *const $elem_ty as *const u8; ++ let mut x = Self::splat(0 as $elem_ty); ++ let self_ptr = &mut x as *mut Self as *mut u8; ++ crate::ptr::copy_nonoverlapping( ++ target_ptr, ++ self_ptr, ++ size_of::(), ++ ); ++ x ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_from_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_unaligned() { ++ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; ++ unaligned[0] = 0 as $elem_ty; ++ let vec = $id::from_slice_unaligned(&unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_unaligned_fail() { ++ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; ++ unaligned[0] = 0 as $elem_ty; ++ // the slice is not large enough => panic ++ let _vec = $id::from_slice_unaligned(&unaligned[2..]); ++ } ++ ++ union A { ++ data: [$elem_ty; 2 * $id::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_aligned() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ for i in $id::lanes()..(2 * $id::lanes()) { ++ unsafe { ++ aligned.data[i] = 42 as $elem_ty; ++ } ++ } ++ ++ let vec = unsafe { ++ $id::from_slice_aligned( ++ &aligned.data[$id::lanes()..] ++ ) ++ }; ++ for (index, &b) in ++ unsafe { aligned.data.iter().enumerate() } { ++ if index < $id::lanes() { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!( ++ b, vec.extract(index - $id::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_lanes() { ++ let aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let _vec = unsafe { ++ $id::from_slice_aligned( ++ &aligned.data[2 * $id::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_align() { ++ unsafe { ++ let aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr: *const $elem_ty = aligned.data.as_ptr() ++ as *const $elem_ty; ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // from_slice_aligned won't fail here (e.g. this ++ // can happen for i128x1). So we panic to make ++ // the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s: &[$elem_ty] = slice::from_raw_parts( ++ ptr, $id::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let _vec = $id::from_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs +new file mode 100644 +index 000000000000..fcb288da70fc +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs +@@ -0,0 +1,211 @@ ++//! Implements methods to write a vector type to a slice. ++ ++macro_rules! impl_slice_write_to_slice { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary. ++ #[inline] ++ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty; ++ assert_eq!( ++ target_ptr ++ .align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ self.write_to_slice_aligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ self.write_to_slice_unaligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary, the behavior is ++ /// undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_aligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; ++ debug_assert_eq!( ++ target_ptr.align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ *(target_ptr as *mut Self) = self; ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_unaligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; ++ let self_ptr = &self as *const Self as *const u8; ++ crate::ptr::copy_nonoverlapping( ++ self_ptr, ++ target_ptr, ++ crate::mem::size_of::(), ++ ); ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_write_to_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_unaligned() { ++ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_unaligned(&mut unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_unaligned_fail() { ++ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_unaligned(&mut unaligned[2..]); ++ } ++ ++ union A { ++ data: [$elem_ty; 2 * $id::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_aligned() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let vec = $id::splat(42 as $elem_ty); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[$id::lanes()..] ++ ); ++ for (idx, &b) in aligned.data.iter().enumerate() { ++ if idx < $id::lanes() { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!( ++ b, vec.extract(idx - $id::lanes()) ++ ); ++ } ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_lanes() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let vec = $id::splat(42 as $elem_ty); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[2 * $id::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_align() { ++ unsafe { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr: *mut $elem_ty ++ = aligned.data.as_mut_ptr() as *mut $elem_ty; ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset(crate::mem::align_of::<$id>()) ++ == 0 { ++ // the pointer is properly aligned, so ++ // write_to_slice_aligned won't fail here (e.g. ++ // this can happen for i128x1). So we panic to ++ // make the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s: &mut [$elem_ty] ++ = slice::from_raw_parts_mut(ptr, $id::lanes()); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs +new file mode 100644 +index 000000000000..53bba25bd311 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs +@@ -0,0 +1,192 @@ ++//! Horizontal swap bytes ++ ++macro_rules! impl_swap_bytes { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reverses the byte order of the vector. ++ #[inline] ++ pub fn swap_bytes(self) -> Self { ++ super::codegen::swap_bytes::SwapBytes::swap_bytes(self) ++ } ++ ++ /// Converts self to little endian from the target's endianness. ++ /// ++ /// On little endian this is a no-op. On big endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn to_le(self) -> Self { ++ #[cfg(target_endian = "little")] ++ { ++ self ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ self.swap_bytes() ++ } ++ } ++ ++ /// Converts self to big endian from the target's endianness. ++ /// ++ /// On big endian this is a no-op. On little endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn to_be(self) -> Self { ++ #[cfg(target_endian = "big")] ++ { ++ self ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ self.swap_bytes() ++ } ++ } ++ ++ /// Converts a vector from little endian to the target's endianness. ++ /// ++ /// On little endian this is a no-op. On big endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn from_le(x: Self) -> Self { ++ #[cfg(target_endian = "little")] ++ { ++ x ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ x.swap_bytes() ++ } ++ } ++ ++ /// Converts a vector from big endian to the target's endianness. ++ /// ++ /// On big endian this is a no-op. On little endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn from_be(x: Self) -> Self { ++ #[cfg(target_endian = "big")] ++ { ++ x ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ x.swap_bytes() ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item_with_macros! { ++ pub mod [<$id _swap_bytes>] { ++ use super::*; ++ ++ const BYTES: [u8; 64] = [ ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, ++ 24, 25, 26, 27, 28, 29, 30, 31, ++ 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, 43, 44, 45, 46, 47, ++ 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, ++ ]; ++ ++ macro_rules! swap { ++ ($func: ident) => {{ ++ // catch possible future >512 vectors ++ assert!(mem::size_of::<$id>() <= 64); ++ ++ let mut actual = BYTES; ++ let elems: &mut [$elem_ty] = unsafe { ++ slice::from_raw_parts_mut( ++ actual.as_mut_ptr() as *mut $elem_ty, ++ $id::lanes(), ++ ) ++ }; ++ ++ let vec = $id::from_slice_unaligned(elems); ++ $id::$func(vec).write_to_slice_unaligned(elems); ++ ++ actual ++ }}; ++ } ++ ++ macro_rules! test_swap { ++ ($func: ident) => {{ ++ let actual = swap!($func); ++ let expected = ++ BYTES.iter().rev() ++ .skip(64 - crate::mem::size_of::<$id>()); ++ assert!(actual.iter().zip(expected) ++ .all(|(x, y)| x == y)); ++ }}; ++ } ++ ++ macro_rules! test_no_swap { ++ ($func: ident) => {{ ++ let actual = swap!($func); ++ let expected = BYTES.iter() ++ .take(mem::size_of::<$id>()); ++ ++ assert!(actual.iter().zip(expected) ++ .all(|(x, y)| x == y)); ++ }}; ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn swap_bytes() { ++ test_swap!(swap_bytes); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn to_le() { ++ #[cfg(target_endian = "little")] ++ { ++ test_no_swap!(to_le); ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ test_swap!(to_le); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn to_be() { ++ #[cfg(target_endian = "big")] ++ { ++ test_no_swap!(to_be); ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ test_swap!(to_be); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_le() { ++ #[cfg(target_endian = "little")] ++ { ++ test_no_swap!(from_le); ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ test_swap!(from_le); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_be() { ++ #[cfg(target_endian = "big")] ++ { ++ test_no_swap!(from_be); ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ test_swap!(from_be); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs +new file mode 100644 +index 000000000000..b7ccd838603f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen.rs +@@ -0,0 +1,59 @@ ++//! Code-generation utilities ++ ++crate mod bit_manip; ++crate mod llvm; ++crate mod math; ++crate mod reductions; ++crate mod shuffle; ++crate mod shuffle1_dyn; ++crate mod swap_bytes; ++ ++macro_rules! impl_simd_array { ++ ([$elem_ty:ident; $elem_count:expr]: ++ $tuple_id:ident | $($elem_tys:ident),*) => { ++ #[derive(Copy, Clone)] ++ #[repr(simd)] ++ pub struct $tuple_id($(crate $elem_tys),*); ++ //^^^^^^^ leaked through SimdArray ++ ++ impl crate::sealed::SimdArray for [$elem_ty; $elem_count] { ++ type Tuple = $tuple_id; ++ type T = $elem_ty; ++ const N: usize = $elem_count; ++ type NT = [u32; $elem_count]; ++ } ++ ++ impl crate::sealed::Simd for $tuple_id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ } ++} ++ ++crate mod pointer_sized_int; ++ ++crate mod v16; ++crate use self::v16::*; ++ ++crate mod v32; ++crate use self::v32::*; ++ ++crate mod v64; ++crate use self::v64::*; ++ ++crate mod v128; ++crate use self::v128::*; ++ ++crate mod v256; ++crate use self::v256::*; ++ ++crate mod v512; ++crate use self::v512::*; ++ ++crate mod vSize; ++crate use self::vSize::*; ++ ++crate mod vPtr; ++crate use self::vPtr::*; +diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs +new file mode 100644 +index 000000000000..947266f5bce8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs +@@ -0,0 +1,354 @@ ++//! LLVM bit manipulation intrinsics. ++#![rustfmt::skip] ++ ++use crate::*; ++ ++#[allow(improper_ctypes, dead_code)] ++extern "C" { ++ #[link_name = "llvm.ctlz.v2i8"] ++ fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; ++ #[link_name = "llvm.ctlz.v4i8"] ++ fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; ++ #[link_name = "llvm.ctlz.v8i8"] ++ fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; ++ #[link_name = "llvm.ctlz.v16i8"] ++ fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; ++ #[link_name = "llvm.ctlz.v32i8"] ++ fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; ++ #[link_name = "llvm.ctlz.v64i8"] ++ fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; ++ ++ #[link_name = "llvm.ctlz.v2i16"] ++ fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; ++ #[link_name = "llvm.ctlz.v4i16"] ++ fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; ++ #[link_name = "llvm.ctlz.v8i16"] ++ fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; ++ #[link_name = "llvm.ctlz.v16i16"] ++ fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; ++ #[link_name = "llvm.ctlz.v32i16"] ++ fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; ++ ++ #[link_name = "llvm.ctlz.v2i32"] ++ fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; ++ #[link_name = "llvm.ctlz.v4i32"] ++ fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; ++ #[link_name = "llvm.ctlz.v8i32"] ++ fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; ++ #[link_name = "llvm.ctlz.v16i32"] ++ fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; ++ ++ #[link_name = "llvm.ctlz.v2i64"] ++ fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; ++ #[link_name = "llvm.ctlz.v4i64"] ++ fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; ++ #[link_name = "llvm.ctlz.v8i64"] ++ fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; ++ ++ #[link_name = "llvm.ctlz.v1i128"] ++ fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; ++ #[link_name = "llvm.ctlz.v2i128"] ++ fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; ++ #[link_name = "llvm.ctlz.v4i128"] ++ fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; ++ ++ #[link_name = "llvm.cttz.v2i8"] ++ fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; ++ #[link_name = "llvm.cttz.v4i8"] ++ fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; ++ #[link_name = "llvm.cttz.v8i8"] ++ fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; ++ #[link_name = "llvm.cttz.v16i8"] ++ fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; ++ #[link_name = "llvm.cttz.v32i8"] ++ fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; ++ #[link_name = "llvm.cttz.v64i8"] ++ fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; ++ ++ #[link_name = "llvm.cttz.v2i16"] ++ fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; ++ #[link_name = "llvm.cttz.v4i16"] ++ fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; ++ #[link_name = "llvm.cttz.v8i16"] ++ fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; ++ #[link_name = "llvm.cttz.v16i16"] ++ fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; ++ #[link_name = "llvm.cttz.v32i16"] ++ fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; ++ ++ #[link_name = "llvm.cttz.v2i32"] ++ fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; ++ #[link_name = "llvm.cttz.v4i32"] ++ fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; ++ #[link_name = "llvm.cttz.v8i32"] ++ fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; ++ #[link_name = "llvm.cttz.v16i32"] ++ fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; ++ ++ #[link_name = "llvm.cttz.v2i64"] ++ fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; ++ #[link_name = "llvm.cttz.v4i64"] ++ fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; ++ #[link_name = "llvm.cttz.v8i64"] ++ fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; ++ ++ #[link_name = "llvm.cttz.v1i128"] ++ fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; ++ #[link_name = "llvm.cttz.v2i128"] ++ fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; ++ #[link_name = "llvm.cttz.v4i128"] ++ fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; ++ ++ #[link_name = "llvm.ctpop.v2i8"] ++ fn ctpop_u8x2(x: u8x2) -> u8x2; ++ #[link_name = "llvm.ctpop.v4i8"] ++ fn ctpop_u8x4(x: u8x4) -> u8x4; ++ #[link_name = "llvm.ctpop.v8i8"] ++ fn ctpop_u8x8(x: u8x8) -> u8x8; ++ #[link_name = "llvm.ctpop.v16i8"] ++ fn ctpop_u8x16(x: u8x16) -> u8x16; ++ #[link_name = "llvm.ctpop.v32i8"] ++ fn ctpop_u8x32(x: u8x32) -> u8x32; ++ #[link_name = "llvm.ctpop.v64i8"] ++ fn ctpop_u8x64(x: u8x64) -> u8x64; ++ ++ #[link_name = "llvm.ctpop.v2i16"] ++ fn ctpop_u16x2(x: u16x2) -> u16x2; ++ #[link_name = "llvm.ctpop.v4i16"] ++ fn ctpop_u16x4(x: u16x4) -> u16x4; ++ #[link_name = "llvm.ctpop.v8i16"] ++ fn ctpop_u16x8(x: u16x8) -> u16x8; ++ #[link_name = "llvm.ctpop.v16i16"] ++ fn ctpop_u16x16(x: u16x16) -> u16x16; ++ #[link_name = "llvm.ctpop.v32i16"] ++ fn ctpop_u16x32(x: u16x32) -> u16x32; ++ ++ #[link_name = "llvm.ctpop.v2i32"] ++ fn ctpop_u32x2(x: u32x2) -> u32x2; ++ #[link_name = "llvm.ctpop.v4i32"] ++ fn ctpop_u32x4(x: u32x4) -> u32x4; ++ #[link_name = "llvm.ctpop.v8i32"] ++ fn ctpop_u32x8(x: u32x8) -> u32x8; ++ #[link_name = "llvm.ctpop.v16i32"] ++ fn ctpop_u32x16(x: u32x16) -> u32x16; ++ ++ #[link_name = "llvm.ctpop.v2i64"] ++ fn ctpop_u64x2(x: u64x2) -> u64x2; ++ #[link_name = "llvm.ctpop.v4i64"] ++ fn ctpop_u64x4(x: u64x4) -> u64x4; ++ #[link_name = "llvm.ctpop.v8i64"] ++ fn ctpop_u64x8(x: u64x8) -> u64x8; ++ ++ #[link_name = "llvm.ctpop.v1i128"] ++ fn ctpop_u128x1(x: u128x1) -> u128x1; ++ #[link_name = "llvm.ctpop.v2i128"] ++ fn ctpop_u128x2(x: u128x2) -> u128x2; ++ #[link_name = "llvm.ctpop.v4i128"] ++ fn ctpop_u128x4(x: u128x4) -> u128x4; ++} ++ ++crate trait BitManip { ++ fn ctpop(self) -> Self; ++ fn ctlz(self) -> Self; ++ fn cttz(self) -> Self; ++} ++ ++macro_rules! impl_bit_manip { ++ (inner: $ty:ident, $scalar:ty, $uty:ident, ++ $ctpop:ident, $ctlz:ident, $cttz:ident) => { ++ // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 ++ #[cfg(target_arch = "s390x")] ++ impl_bit_manip! { scalar: $ty, $scalar } ++ #[cfg(not(target_arch = "s390x"))] ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: $uty = self.cast(); ++ unsafe { $ctpop(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: $uty = self.cast(); ++ // the ctxx intrinsics need compile-time constant ++ // `is_zero_undef` ++ unsafe { $ctlz(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let y: $uty = self.cast(); ++ unsafe { $cttz(y, false).cast() } ++ } ++ } ++ }; ++ (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { ++ #[cfg(target_arch = "s390x")] ++ impl_bit_manip! { scalar: $ty, $scalar } ++ #[cfg(not(target_arch = "s390x"))] ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::ctpop(y).cast() ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::ctlz(y).cast() ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::cttz(y).cast() ++ } ++ } ++ }; ++ (scalar: $ty:ident, $scalar:ty) => { ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let mut ones = self; ++ for i in 0..Self::lanes() { ++ ones = ones ++ .replace(i, self.extract(i).count_ones() as $scalar); ++ } ++ ones ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let mut lz = self; ++ for i in 0..Self::lanes() { ++ lz = lz.replace( ++ i, ++ self.extract(i).leading_zeros() as $scalar, ++ ); ++ } ++ lz ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace( ++ i, ++ self.extract(i).trailing_zeros() as $scalar, ++ ); ++ } ++ tz ++ } ++ } ++ }; ++ ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, ++ $ctpop:ident, $ctlz:ident, $cttz:ident) => { ++ impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } ++ impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } ++ }; ++ (sized: $usize:ident, $uscalar:ty, $isize:ident, ++ $iscalar:ty, $ty:ident) => { ++ impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } ++ impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } ++ }; ++} ++ ++impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } ++impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } ++#[cfg(not(target_arch = "aarch64"))] // see below ++impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } ++impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } ++impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } ++impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } ++impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } ++impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } ++impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } ++impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } ++impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } ++impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } ++impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } ++impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } ++impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } ++impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } ++impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } ++impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } ++impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } ++impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } ++impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } ++ ++#[cfg(target_arch = "aarch64")] ++impl BitManip for u8x8 { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctpop_u8x8(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctlz_u8x8(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 ++ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 ++ // intrinsics ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); ++ } ++ tz ++ } ++} ++#[cfg(target_arch = "aarch64")] ++impl BitManip for i8x8 { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctpop_u8x8(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctlz_u8x8(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 ++ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 ++ // intrinsics ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); ++ } ++ tz ++ } ++} ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } ++ } else if #[cfg(target_pointer_width = "16")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } ++ } else if #[cfg(target_pointer_width = "32")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } ++ } else if #[cfg(target_pointer_width = "64")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs +new file mode 100644 +index 000000000000..91c2b0758dcf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/llvm.rs +@@ -0,0 +1,99 @@ ++//! LLVM's platform intrinsics ++#![allow(dead_code)] ++ ++use crate::sealed::Shuffle; ++#[allow(unused_imports)] // FIXME: spurious warning? ++use crate::sealed::Simd; ++ ++// Shuffle intrinsics: expanded in users' crates, therefore public. ++extern "platform-intrinsic" { ++ // FIXME: Passing this intrinsics an `idx` array with an index that is ++ // out-of-bounds will produce a monomorphization-time error. ++ // https://github.com/rust-lang-nursery/packed_simd/issues/21 ++ pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 2], Output = U>; ++ ++ pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 4], Output = U>; ++ ++ pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 8], Output = U>; ++ ++ pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 16], Output = U>; ++ ++ pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 32], Output = U>; ++ ++ pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 64], Output = U>; ++} ++ ++pub use self::simd_shuffle16 as __shuffle_vector16; ++pub use self::simd_shuffle2 as __shuffle_vector2; ++pub use self::simd_shuffle32 as __shuffle_vector32; ++pub use self::simd_shuffle4 as __shuffle_vector4; ++pub use self::simd_shuffle64 as __shuffle_vector64; ++pub use self::simd_shuffle8 as __shuffle_vector8; ++ ++extern "platform-intrinsic" { ++ crate fn simd_eq(x: T, y: T) -> U; ++ crate fn simd_ne(x: T, y: T) -> U; ++ crate fn simd_lt(x: T, y: T) -> U; ++ crate fn simd_le(x: T, y: T) -> U; ++ crate fn simd_gt(x: T, y: T) -> U; ++ crate fn simd_ge(x: T, y: T) -> U; ++ ++ crate fn simd_insert(x: T, idx: u32, val: U) -> T; ++ crate fn simd_extract(x: T, idx: u32) -> U; ++ ++ crate fn simd_cast(x: T) -> U; ++ ++ crate fn simd_add(x: T, y: T) -> T; ++ crate fn simd_sub(x: T, y: T) -> T; ++ crate fn simd_mul(x: T, y: T) -> T; ++ crate fn simd_div(x: T, y: T) -> T; ++ crate fn simd_rem(x: T, y: T) -> T; ++ crate fn simd_shl(x: T, y: T) -> T; ++ crate fn simd_shr(x: T, y: T) -> T; ++ crate fn simd_and(x: T, y: T) -> T; ++ crate fn simd_or(x: T, y: T) -> T; ++ crate fn simd_xor(x: T, y: T) -> T; ++ ++ crate fn simd_reduce_add_unordered(x: T) -> U; ++ crate fn simd_reduce_mul_unordered(x: T) -> U; ++ crate fn simd_reduce_add_ordered(x: T, acc: U) -> U; ++ crate fn simd_reduce_mul_ordered(x: T, acc: U) -> U; ++ crate fn simd_reduce_min(x: T) -> U; ++ crate fn simd_reduce_max(x: T) -> U; ++ crate fn simd_reduce_min_nanless(x: T) -> U; ++ crate fn simd_reduce_max_nanless(x: T) -> U; ++ crate fn simd_reduce_and(x: T) -> U; ++ crate fn simd_reduce_or(x: T) -> U; ++ crate fn simd_reduce_xor(x: T) -> U; ++ crate fn simd_reduce_all(x: T) -> bool; ++ crate fn simd_reduce_any(x: T) -> bool; ++ ++ crate fn simd_select(m: M, a: T, b: T) -> T; ++ ++ crate fn simd_fmin(a: T, b: T) -> T; ++ crate fn simd_fmax(a: T, b: T) -> T; ++ ++ crate fn simd_fsqrt(a: T) -> T; ++ crate fn simd_fma(a: T, b: T, c: T) -> T; ++ ++ crate fn simd_gather(value: T, pointers: P, mask: M) -> T; ++ crate fn simd_scatter(value: T, pointers: P, mask: M); ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs +new file mode 100644 +index 000000000000..f3997c7f1135 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math.rs +@@ -0,0 +1,3 @@ ++//! Vertical math operations ++ ++crate mod float; +diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs +new file mode 100644 +index 000000000000..5e89bf6ae6b0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float.rs +@@ -0,0 +1,18 @@ ++//! Vertical floating-point math operations. ++#![allow(clippy::useless_transmute)] ++ ++#[macro_use] ++crate mod macros; ++crate mod abs; ++crate mod cos; ++crate mod cos_pi; ++crate mod exp; ++crate mod ln; ++crate mod mul_add; ++crate mod mul_adde; ++crate mod powf; ++crate mod sin; ++crate mod sin_cos_pi; ++crate mod sin_pi; ++crate mod sqrt; ++crate mod sqrte; +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs +new file mode 100644 +index 000000000000..bc4421f61de2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `fabs` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors fabs ++ ++use crate::*; ++ ++crate trait Abs { ++ fn abs(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fabs.v2f32"] ++ fn fabs_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.fabs.v4f32"] ++ fn fabs_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.fabs.v8f32"] ++ fn fabs_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.fabs.v16f32"] ++ fn fabs_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit fabsgle elem vectors ++ #[link_name = "llvm.fabs.v1f64"] ++ fn fabs_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fabs.v2f64"] ++ fn fabs_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.fabs.v4f64"] ++ fn fabs_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.fabs.v8f64"] ++ fn fabs_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.fabs.f32"] ++ fn fabs_f32(x: f32) -> f32; ++ #[link_name = "llvm.fabs.f64"] ++ fn fabs_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Abs, abs); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x4[f32; 4]: fabs_f32); ++ impl_unary!(f32x8[f32; 8]: fabs_f32); ++ impl_unary!(f32x16[f32; 16]: fabs_f32); ++ ++ impl_unary!(f64x2[f64; 2]: fabs_f64); ++ impl_unary!(f64x4[f64; 4]: fabs_f64); ++ impl_unary!(f64x8[f64; 8]: fabs_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_avx2128); ++ impl_unary!(f32x8: Sleef_fabsf8_avx2); ++ impl_unary!(f64x2: Sleef_fabsd2_avx2128); ++ impl_unary!(f64x4: Sleef_fabsd4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_sse4); ++ impl_unary!(f32x8: Sleef_fabsf8_avx); ++ impl_unary!(f64x2: Sleef_fabsd2_sse4); ++ impl_unary!(f64x4: Sleef_fabsd4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f64x2: Sleef_fabsd2_sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x16: fabs_v16f32); ++ impl_unary!(f64x8: fabs_v8f64); ++ ++ impl_unary!(f32x4: fabs_v4f32); ++ impl_unary!(f32x8: fabs_v8f32); ++ impl_unary!(f64x2: fabs_v2f64); ++ impl_unary!(f64x4: fabs_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x4: fabs_v4f32); ++ impl_unary!(f32x8: fabs_v8f32); ++ impl_unary!(f32x16: fabs_v16f32); ++ ++ impl_unary!(f64x2: fabs_v2f64); ++ impl_unary!(f64x4: fabs_v4f64); ++ impl_unary!(f64x8: fabs_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs +new file mode 100644 +index 000000000000..50f6c16da255 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vector cos ++ ++use crate::*; ++ ++crate trait Cos { ++ fn cos(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.cos.v2f32"] ++ fn cos_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.cos.v4f32"] ++ fn cos_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.cos.v8f32"] ++ fn cos_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.cos.v16f32"] ++ fn cos_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit cosgle elem vectors ++ #[link_name = "llvm.cos.v1f64"] ++ fn cos_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.cos.v2f64"] ++ fn cos_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.cos.v4f64"] ++ fn cos_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.cos.v8f64"] ++ fn cos_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.cos.f32"] ++ fn cos_f32(x: f32) -> f32; ++ #[link_name = "llvm.cos.f64"] ++ fn cos_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Cos, cos); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x4[f32; 4]: cos_f32); ++ impl_unary!(f32x8[f32; 8]: cos_f32); ++ impl_unary!(f32x16[f32; 16]: cos_f32); ++ ++ impl_unary!(f64x2[f64; 2]: cos_f64); ++ impl_unary!(f64x4[f64; 4]: cos_f64); ++ impl_unary!(f64x8[f64; 8]: cos_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_cosf8_u10avx2); ++ impl_unary!(f64x2: Sleef_cosd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_cosd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x8: Sleef_cosf8_u10avx); ++ impl_unary!(f64x2: Sleef_cosd2_u10sse4); ++ impl_unary!(f64x4: Sleef_cosd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f64x2: Sleef_cosd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x16: cos_v16f32); ++ impl_unary!(f64x8: cos_v8f64); ++ ++ impl_unary!(f32x4: cos_v4f32); ++ impl_unary!(f32x8: cos_v8f32); ++ impl_unary!(f64x2: cos_v2f64); ++ impl_unary!(f64x4: cos_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x4: cos_v4f32); ++ impl_unary!(f32x8: cos_v8f32); ++ impl_unary!(f32x16: cos_v16f32); ++ ++ impl_unary!(f64x2: cos_v2f64); ++ impl_unary!(f64x4: cos_v4f64); ++ impl_unary!(f64x8: cos_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs +new file mode 100644 +index 000000000000..ebff5fd1c751 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs +@@ -0,0 +1,87 @@ ++//! Vertical floating-point `cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors cos_pi ++ ++use crate::*; ++ ++crate trait CosPi { ++ fn cos_pi(self) -> Self; ++} ++ ++gen_unary_impl_table!(CosPi, cos_pi); ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl CosPi for $vid { ++ #[inline] ++ fn cos_pi(self) -> Self { ++ (self * Self::splat($PI)).cos() ++ } ++ } ++ }; ++} ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05avx2128); ++ impl_unary!(f32x8: Sleef_cospif8_u05avx2); ++ impl_unary!(f64x2: Sleef_cospid2_u05avx2128); ++ impl_unary!(f64x4: Sleef_cospid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x8: Sleef_cospif8_u05avx); ++ impl_unary!(f64x2: Sleef_cospid2_u05sse4); ++ impl_unary!(f64x4: Sleef_cospid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f64x2: Sleef_cospid2_u05sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs +new file mode 100644 +index 000000000000..00d10e9fa644 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `exp` ++#![allow(unused)] ++ ++// FIXME 64-bit expgle elem vectors misexpg ++ ++use crate::*; ++ ++crate trait Exp { ++ fn exp(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.exp.v2f32"] ++ fn exp_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.exp.v4f32"] ++ fn exp_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.exp.v8f32"] ++ fn exp_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.exp.v16f32"] ++ fn exp_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit expgle elem vectors ++ #[link_name = "llvm.exp.v1f64"] ++ fn exp_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.exp.v2f64"] ++ fn exp_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.exp.v4f64"] ++ fn exp_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.exp.v8f64"] ++ fn exp_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.exp.f32"] ++ fn exp_f32(x: f32) -> f32; ++ #[link_name = "llvm.exp.f64"] ++ fn exp_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Exp, exp); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x4[f32; 4]: exp_f32); ++ impl_unary!(f32x8[f32; 8]: exp_f32); ++ impl_unary!(f32x16[f32; 16]: exp_f32); ++ ++ impl_unary!(f64x2[f64; 2]: exp_f64); ++ impl_unary!(f64x4[f64; 4]: exp_f64); ++ impl_unary!(f64x8[f64; 8]: exp_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_expf8_u10avx2); ++ impl_unary!(f64x2: Sleef_expd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_expd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse4); ++ impl_unary!(f32x8: Sleef_expf8_u10avx); ++ impl_unary!(f64x2: Sleef_expd2_u10sse4); ++ impl_unary!(f64x4: Sleef_expd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f64x2: Sleef_expd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse2); ++ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f64x2: Sleef_expd2_u10sse2); ++ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); ++ } else { ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x16: exp_v16f32); ++ impl_unary!(f64x8: exp_v8f64); ++ ++ impl_unary!(f32x4: exp_v4f32); ++ impl_unary!(f32x8: exp_v8f32); ++ impl_unary!(f64x2: exp_v2f64); ++ impl_unary!(f64x4: exp_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x4: exp_v4f32); ++ impl_unary!(f32x8: exp_v8f32); ++ impl_unary!(f32x16: exp_v16f32); ++ ++ impl_unary!(f64x2: exp_v2f64); ++ impl_unary!(f64x4: exp_v4f64); ++ impl_unary!(f64x8: exp_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs +new file mode 100644 +index 000000000000..88a5a6c6c158 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `ln` ++#![allow(unused)] ++ ++// FIXME 64-bit lngle elem vectors mislng ++ ++use crate::*; ++ ++crate trait Ln { ++ fn ln(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.log.v2f32"] ++ fn ln_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.log.v4f32"] ++ fn ln_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.log.v8f32"] ++ fn ln_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.log.v16f32"] ++ fn ln_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit lngle elem vectors ++ #[link_name = "llvm.log.v1f64"] ++ fn ln_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.log.v2f64"] ++ fn ln_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.log.v4f64"] ++ fn ln_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.log.v8f64"] ++ fn ln_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.log.f32"] ++ fn ln_f32(x: f32) -> f32; ++ #[link_name = "llvm.log.f64"] ++ fn ln_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Ln, ln); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x4[f32; 4]: ln_f32); ++ impl_unary!(f32x8[f32; 8]: ln_f32); ++ impl_unary!(f32x16[f32; 16]: ln_f32); ++ ++ impl_unary!(f64x2[f64; 2]: ln_f64); ++ impl_unary!(f64x4[f64; 4]: ln_f64); ++ impl_unary!(f64x8[f64; 8]: ln_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_logf8_u10avx2); ++ impl_unary!(f64x2: Sleef_logd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_logd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse4); ++ impl_unary!(f32x8: Sleef_logf8_u10avx); ++ impl_unary!(f64x2: Sleef_logd2_u10sse4); ++ impl_unary!(f64x4: Sleef_logd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f64x2: Sleef_logd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse2); ++ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f64x2: Sleef_logd2_u10sse2); ++ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); ++ } else { ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x16: ln_v16f32); ++ impl_unary!(f64x8: ln_v8f64); ++ ++ impl_unary!(f32x4: ln_v4f32); ++ impl_unary!(f32x8: ln_v8f32); ++ impl_unary!(f64x2: ln_v2f64); ++ impl_unary!(f64x4: ln_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x4: ln_v4f32); ++ impl_unary!(f32x8: ln_v8f32); ++ impl_unary!(f32x16: ln_v16f32); ++ ++ impl_unary!(f64x2: ln_v2f64); ++ impl_unary!(f64x4: ln_v4f64); ++ impl_unary!(f64x8: ln_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs +new file mode 100644 +index 000000000000..02d0ca3f5c7a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs +@@ -0,0 +1,559 @@ ++//! Utility macros ++#![allow(unused)] ++ ++ ++macro_rules! impl_unary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self))) ++ } ++ } ++ } ++ }; ++ // implementation mapping 1:1 for when `$fun` is a generic function ++ // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). ++ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(self.0)) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut scalars = U { vec: self }.scalars; ++ for i in &mut scalars { ++ *i = $fun(*i); ++ } ++ U { scalars }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut halves = U { vec: self }.halves; ++ ++ *halves.get_unchecked_mut(0) = ++ transmute($fun(transmute(*halves.get_unchecked(0)))); ++ *halves.get_unchecked_mut(1) = ++ transmute($fun(transmute(*halves.get_unchecked(1)))); ++ ++ U { halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut quarters = U { vec: self }.quarters; ++ ++ *quarters.get_unchecked_mut(0) = ++ transmute($fun(transmute(*quarters.get_unchecked(0)))); ++ *quarters.get_unchecked_mut(1) = ++ transmute($fun(transmute(*quarters.get_unchecked(1)))); ++ *quarters.get_unchecked_mut(2) = ++ transmute($fun(transmute(*quarters.get_unchecked(2)))); ++ *quarters.get_unchecked_mut(3) = ++ transmute($fun(transmute(*quarters.get_unchecked(3)))); ++ ++ U { quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let twice = U { vec: [self, uninitialized()] }.twice; ++ let twice = transmute($fun(transmute(twice))); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_unary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_unary { ++ ($vid:ident: $fun:ident) => { ++ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[g]: $fun:ident) => { ++ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_unary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_unary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_unary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_unary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} ++ ++macro_rules! impl_tertiary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun( ++ transmute(self), ++ transmute(y), ++ transmute(z), ++ )) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut x = U { vec: self }.scalars; ++ let y = U { vec: y }.scalars; ++ let z = U { vec: z }.scalars; ++ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { ++ *i = $fun(*i, *y, *z); ++ } ++ U { vec: x }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut x_halves = U { vec: self }.halves; ++ let y_halves = U { vec: y }.halves; ++ let z_halves = U { vec: z }.halves; ++ ++ *x_halves.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_halves.get_unchecked(0)), ++ transmute(*y_halves.get_unchecked(0)), ++ transmute(*z_halves.get_unchecked(0)), ++ )); ++ *x_halves.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_halves.get_unchecked(1)), ++ transmute(*y_halves.get_unchecked(1)), ++ transmute(*z_halves.get_unchecked(1)), ++ )); ++ ++ U { halves: x_halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut x_quarters = U { vec: self }.quarters; ++ let y_quarters = U { vec: y }.quarters; ++ let z_quarters = U { vec: z }.quarters; ++ ++ *x_quarters.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(0)), ++ transmute(*y_quarters.get_unchecked(0)), ++ transmute(*z_quarters.get_unchecked(0)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(1)), ++ transmute(*y_quarters.get_unchecked(1)), ++ transmute(*z_quarters.get_unchecked(1)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(2) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(2)), ++ transmute(*y_quarters.get_unchecked(2)), ++ transmute(*z_quarters.get_unchecked(2)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(3) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(3)), ++ transmute(*y_quarters.get_unchecked(3)), ++ transmute(*z_quarters.get_unchecked(3)), ++ )); ++ ++ U { quarters: x_quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let x_twice = U { vec: [self, uninitialized()] }.twice; ++ let y_twice = U { vec: [y, uninitialized()] }.twice; ++ let z_twice = U { vec: [z, uninitialized()] }.twice; ++ let twice: $vect_id = transmute($fun( ++ transmute(x_twice), ++ transmute(y_twice), ++ transmute(z_twice), ++ )); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_tertiary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_tertiary { ++ ($vid:ident: $fun:ident) => { ++ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_tertiary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} ++ ++macro_rules! impl_binary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self), transmute(y))) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut x = U { vec: self }.scalars; ++ let y = U { vec: y }.scalars; ++ for (x, y) in x.iter_mut().zip(&y) { ++ *x = $fun(*x, *y); ++ } ++ U { scalars: x }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut x_halves = U { vec: self }.halves; ++ let y_halves = U { vec: y }.halves; ++ ++ *x_halves.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_halves.get_unchecked(0)), ++ transmute(*y_halves.get_unchecked(0)), ++ )); ++ *x_halves.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_halves.get_unchecked(1)), ++ transmute(*y_halves.get_unchecked(1)), ++ )); ++ ++ U { halves: x_halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut x_quarters = U { vec: self }.quarters; ++ let y_quarters = U { vec: y }.quarters; ++ ++ *x_quarters.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(0)), ++ transmute(*y_quarters.get_unchecked(0)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(1)), ++ transmute(*y_quarters.get_unchecked(1)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(2) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(2)), ++ transmute(*y_quarters.get_unchecked(2)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(3) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(3)), ++ transmute(*y_quarters.get_unchecked(3)), ++ )); ++ ++ U { quarters: x_quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let x_twice = U { vec: [self, uninitialized()] }.twice; ++ let y_twice = U { vec: [y, uninitialized()] }.twice; ++ let twice: $vect_id = transmute($fun( ++ transmute(x_twice), ++ transmute(y_twice), ++ )); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_binary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_binary { ++ ($vid:ident: $fun:ident) => { ++ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_binary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_binary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_binary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_binary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs +new file mode 100644 +index 000000000000..f48a57dc46c6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs +@@ -0,0 +1,109 @@ ++//! Vertical floating-point `mul_add` ++#![allow(unused)] ++use crate::*; ++ ++// FIXME: 64-bit 1 element mul_add ++ ++crate trait MulAdd { ++ fn mul_add(self, y: Self, z: Self) -> Self; ++} ++ ++#[cfg(not(target_arch = "s390x"))] ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fma.v2f32"] ++ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; ++ #[link_name = "llvm.fma.v4f32"] ++ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; ++ #[link_name = "llvm.fma.v8f32"] ++ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; ++ #[link_name = "llvm.fma.v16f32"] ++ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.fma.v1f64"] ++ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fma.v2f64"] ++ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; ++ #[link_name = "llvm.fma.v4f64"] ++ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; ++ #[link_name = "llvm.fma.v8f64"] ++ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; ++} ++ ++gen_tertiary_impl_table!(MulAdd, mul_add); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ macro_rules! impl_broken { ++ ($id:ident) => { ++ impl MulAdd for $id { ++ #[inline] ++ fn mul_add(self, y: Self, z: Self) -> Self { ++ self * y + z ++ } ++ } ++ }; ++ } ++ ++ impl_broken!(f32x2); ++ impl_broken!(f32x4); ++ impl_broken!(f32x8); ++ impl_broken!(f32x16); ++ ++ impl_broken!(f64x2); ++ impl_broken!(f64x4); ++ impl_broken!(f64x8); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); ++ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); ++ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); ++ impl_tertiary!(f32x8: Sleef_fmaf8_avx2); ++ impl_tertiary!(f64x2: Sleef_fmad2_avx2128); ++ impl_tertiary!(f64x4: Sleef_fmad4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); ++ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x8: Sleef_fmaf8_avx); ++ impl_tertiary!(f64x2: Sleef_fmad2_sse4); ++ impl_tertiary!(f64x4: Sleef_fmad4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f64x2: Sleef_fmad2_sse4); ++ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); ++ } else { ++ impl_tertiary!(f32x2: fma_v2f32); ++ impl_tertiary!(f32x16: fma_v16f32); ++ impl_tertiary!(f64x8: fma_v8f64); ++ ++ impl_tertiary!(f32x4: fma_v4f32); ++ impl_tertiary!(f32x8: fma_v8f32); ++ impl_tertiary!(f64x2: fma_v2f64); ++ impl_tertiary!(f64x4: fma_v4f64); ++ } ++ } ++ } else { ++ impl_tertiary!(f32x2: fma_v2f32); ++ impl_tertiary!(f32x4: fma_v4f32); ++ impl_tertiary!(f32x8: fma_v8f32); ++ impl_tertiary!(f32x16: fma_v16f32); ++ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors ++ impl_tertiary!(f64x2: fma_v2f64); ++ impl_tertiary!(f64x4: fma_v4f64); ++ impl_tertiary!(f64x8: fma_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs +new file mode 100644 +index 000000000000..8c41fb131d94 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs +@@ -0,0 +1,66 @@ ++//! Approximation for floating-point `mul_add` ++use crate::*; ++ ++// FIXME: 64-bit 1 element mul_adde ++ ++crate trait MulAddE { ++ fn mul_adde(self, y: Self, z: Self) -> Self; ++} ++ ++#[cfg(not(target_arch = "s390x"))] ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fmuladd.v2f32"] ++ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; ++ #[link_name = "llvm.fmuladd.v4f32"] ++ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; ++ #[link_name = "llvm.fmuladd.v8f32"] ++ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; ++ #[link_name = "llvm.fmuladd.v16f32"] ++ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.fmuladd.v1f64"] ++ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fmuladd.v2f64"] ++ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; ++ #[link_name = "llvm.fmuladd.v4f64"] ++ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; ++ #[link_name = "llvm.fmuladd.v8f64"] ++ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; ++} ++ ++macro_rules! impl_mul_adde { ++ ($id:ident : $fn:ident) => { ++ impl MulAddE for $id { ++ #[inline] ++ fn mul_adde(self, y: Self, z: Self) -> Self { ++ #[cfg(not(target_arch = "s390x"))] ++ { ++ use crate::mem::transmute; ++ unsafe { ++ transmute($fn( ++ transmute(self), ++ transmute(y), ++ transmute(z), ++ )) ++ } ++ } ++ #[cfg(target_arch = "s390x")] ++ { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ self * y + z ++ } ++ } ++ } ++ }; ++} ++ ++impl_mul_adde!(f32x2: fmuladd_v2f32); ++impl_mul_adde!(f32x4: fmuladd_v4f32); ++impl_mul_adde!(f32x8: fmuladd_v8f32); ++impl_mul_adde!(f32x16: fmuladd_v16f32); ++// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors ++impl_mul_adde!(f64x2: fmuladd_v2f64); ++impl_mul_adde!(f64x4: fmuladd_v4f64); ++impl_mul_adde!(f64x8: fmuladd_v8f64); +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs +new file mode 100644 +index 000000000000..bc15067d73a3 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `powf` ++#![allow(unused)] ++ ++// FIXME 64-bit powfgle elem vectors mispowfg ++ ++use crate::*; ++ ++crate trait Powf { ++ fn powf(self, x: Self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.pow.v2f32"] ++ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; ++ #[link_name = "llvm.pow.v4f32"] ++ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; ++ #[link_name = "llvm.pow.v8f32"] ++ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; ++ #[link_name = "llvm.pow.v16f32"] ++ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; ++ /* FIXME 64-bit powfgle elem vectors ++ #[link_name = "llvm.pow.v1f64"] ++ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.pow.v2f64"] ++ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; ++ #[link_name = "llvm.pow.v4f64"] ++ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; ++ #[link_name = "llvm.pow.v8f64"] ++ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.pow.f32"] ++ fn powf_f32(x: f32, y: f32) -> f32; ++ #[link_name = "llvm.pow.f64"] ++ fn powf_f64(x: f64, y: f64) -> f64; ++} ++ ++gen_binary_impl_table!(Powf, powf); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4[f32; 4]: powf_f32); ++ impl_binary!(f32x8[f32; 8]: powf_f32); ++ impl_binary!(f32x16[f32; 16]: powf_f32); ++ ++ impl_binary!(f64x2[f64; 2]: powf_f64); ++ impl_binary!(f64x4[f64; 4]: powf_f64); ++ impl_binary!(f64x8[f64; 8]: powf_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); ++ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); ++ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10avx2128); ++ impl_binary!(f32x8: Sleef_powf8_u10avx2); ++ impl_binary!(f64x2: Sleef_powd2_u10avx2128); ++ impl_binary!(f64x4: Sleef_powd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); ++ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse4); ++ impl_binary!(f32x8: Sleef_powf8_u10avx); ++ impl_binary!(f64x2: Sleef_powd2_u10sse4); ++ impl_binary!(f64x4: Sleef_powd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse4); ++ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f64x2: Sleef_powd2_u10sse4); ++ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse2); ++ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f64x2: Sleef_powd2_u10sse2); ++ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); ++ } else { ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4: powf_v4f32); ++ impl_binary!(f32x8: powf_v8f32); ++ impl_binary!(f32x16: powf_v16f32); ++ ++ impl_binary!(f64x2: powf_v2f64); ++ impl_binary!(f64x4: powf_v4f64); ++ impl_binary!(f64x8: powf_v8f64); ++ } ++ } ++ } else { ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4: powf_v4f32); ++ impl_binary!(f32x8: powf_v8f32); ++ impl_binary!(f32x16: powf_v16f32); ++ ++ impl_binary!(f64x2: powf_v2f64); ++ impl_binary!(f64x4: powf_v4f64); ++ impl_binary!(f64x8: powf_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs +new file mode 100644 +index 000000000000..7b014d07da8d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `sin` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin ++ ++use crate::*; ++ ++crate trait Sin { ++ fn sin(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.sin.v2f32"] ++ fn sin_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.sin.v4f32"] ++ fn sin_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.sin.v8f32"] ++ fn sin_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.sin.v16f32"] ++ fn sin_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.sin.v1f64"] ++ fn sin_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.sin.v2f64"] ++ fn sin_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.sin.v4f64"] ++ fn sin_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.sin.v8f64"] ++ fn sin_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.sin.f32"] ++ fn sin_f32(x: f32) -> f32; ++ #[link_name = "llvm.sin.f64"] ++ fn sin_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Sin, sin); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x4[f32; 4]: sin_f32); ++ impl_unary!(f32x8[f32; 8]: sin_f32); ++ impl_unary!(f32x16[f32; 16]: sin_f32); ++ ++ impl_unary!(f64x2[f64; 2]: sin_f64); ++ impl_unary!(f64x4[f64; 4]: sin_f64); ++ impl_unary!(f64x8[f64; 8]: sin_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_sinf8_u10avx2); ++ impl_unary!(f64x2: Sleef_sind2_u10avx2128); ++ impl_unary!(f64x4: Sleef_sind4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x8: Sleef_sinf8_u10avx); ++ impl_unary!(f64x2: Sleef_sind2_u10sse4); ++ impl_unary!(f64x4: Sleef_sind4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f64x2: Sleef_sind2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x16: sin_v16f32); ++ impl_unary!(f64x8: sin_v8f64); ++ ++ impl_unary!(f32x4: sin_v4f32); ++ impl_unary!(f32x8: sin_v8f32); ++ impl_unary!(f64x2: sin_v2f64); ++ impl_unary!(f64x4: sin_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x4: sin_v4f32); ++ impl_unary!(f32x8: sin_v8f32); ++ impl_unary!(f32x16: sin_v16f32); ++ ++ impl_unary!(f64x2: sin_v2f64); ++ impl_unary!(f64x4: sin_v4f64); ++ impl_unary!(f64x8: sin_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs +new file mode 100644 +index 000000000000..0f1249ec88f0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs +@@ -0,0 +1,195 @@ ++//! Vertical floating-point `sin_cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin_cos ++ ++use crate::*; ++ ++crate trait SinCosPi: Sized { ++ type Output; ++ fn sin_cos_pi(self) -> Self::Output; ++} ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ #[inline] ++ fn sin_cos_pi(self) -> Self::Output { ++ let v = self * Self::splat($PI); ++ (v.sin(), v.cos()) ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++macro_rules! impl_unary_t { ++ ($vid:ident: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self))) ++ } ++ } ++ } ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vid; 2], ++ twice: $vid_t, ++ } ++ ++ let twice = U { vec: [self, uninitialized()] }.twice; ++ let twice = transmute($fun(transmute(twice))); ++ ++ union R { ++ twice: ($vid_t, $vid_t), ++ vecs: ([$vid; 2], [$vid; 2]), ++ } ++ let r = R { twice }.vecs; ++ (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) ++ } ++ } ++ } ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ ++ union U { ++ vec: $vid, ++ halves: [$vid_h; 2], ++ } ++ ++ let halves = U { vec: self }.halves; ++ ++ let res_0: ($vid_h, $vid_h) = ++ transmute($fun(transmute(*halves.get_unchecked(0)))); ++ let res_1: ($vid_h, $vid_h) = ++ transmute($fun(transmute(*halves.get_unchecked(1)))); ++ ++ union R { ++ result: ($vid, $vid), ++ halves: ([$vid_h; 2], [$vid_h; 2]), ++ } ++ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } ++ .result ++ } ++ } ++ } ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ ++ union U { ++ vec: $vid, ++ quarters: [$vid_q; 4], ++ } ++ ++ let quarters = U { vec: self }.quarters; ++ ++ let res_0: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(0)))); ++ let res_1: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(1)))); ++ let res_2: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(2)))); ++ let res_3: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(3)))); ++ ++ union R { ++ result: ($vid, $vid), ++ quarters: ([$vid_q; 4], [$vid_q; 4]), ++ } ++ R { ++ quarters: ( ++ [res_0.0, res_1.0, res_2.0, res_3.0], ++ [res_0.1, res_1.1, res_2.1, res_3.1], ++ ), ++ } ++ .result ++ } ++ } ++ } ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); ++ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); ++ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); ++ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); ++ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); ++ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); ++ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); ++ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs +new file mode 100644 +index 000000000000..72df98c93c91 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs +@@ -0,0 +1,87 @@ ++//! Vertical floating-point `sin_pi` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin_pi ++ ++use crate::*; ++ ++crate trait SinPi { ++ fn sin_pi(self) -> Self; ++} ++ ++gen_unary_impl_table!(SinPi, sin_pi); ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl SinPi for $vid { ++ #[inline] ++ fn sin_pi(self) -> Self { ++ (self * Self::splat($PI)).sin() ++ } ++ } ++ }; ++} ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); ++ impl_unary!(f32x8: Sleef_sinpif8_u05avx2); ++ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); ++ impl_unary!(f64x4: Sleef_sinpid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x8: Sleef_sinpif8_u05avx); ++ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); ++ impl_unary!(f64x4: Sleef_sinpid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs +new file mode 100644 +index 000000000000..7ce31df62662 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `sqrt` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sqrt ++ ++use crate::*; ++ ++crate trait Sqrt { ++ fn sqrt(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.sqrt.v2f32"] ++ fn sqrt_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.sqrt.v4f32"] ++ fn sqrt_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.sqrt.v8f32"] ++ fn sqrt_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.sqrt.v16f32"] ++ fn sqrt_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit sqrtgle elem vectors ++ #[link_name = "llvm.sqrt.v1f64"] ++ fn sqrt_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.sqrt.v2f64"] ++ fn sqrt_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.sqrt.v4f64"] ++ fn sqrt_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.sqrt.v8f64"] ++ fn sqrt_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.sqrt.f32"] ++ fn sqrt_f32(x: f32) -> f32; ++ #[link_name = "llvm.sqrt.f64"] ++ fn sqrt_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Sqrt, sqrt); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x4[f32; 4]: sqrt_f32); ++ impl_unary!(f32x8[f32; 8]: sqrt_f32); ++ impl_unary!(f32x16[f32; 16]: sqrt_f32); ++ ++ impl_unary!(f64x2[f64; 2]: sqrt_f64); ++ impl_unary!(f64x4[f64; 4]: sqrt_f64); ++ impl_unary!(f64x8[f64; 8]: sqrt_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_avx2128); ++ impl_unary!(f32x8: Sleef_sqrtf8_avx2); ++ impl_unary!(f64x2: Sleef_sqrtd2_avx2128); ++ impl_unary!(f64x4: Sleef_sqrtd4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x8: Sleef_sqrtf8_avx); ++ impl_unary!(f64x2: Sleef_sqrtd2_sse4); ++ impl_unary!(f64x4: Sleef_sqrtd4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f64x2: Sleef_sqrtd2_sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x16: sqrt_v16f32); ++ impl_unary!(f64x8: sqrt_v8f64); ++ ++ impl_unary!(f32x4: sqrt_v4f32); ++ impl_unary!(f32x8: sqrt_v8f32); ++ impl_unary!(f64x2: sqrt_v2f64); ++ impl_unary!(f64x4: sqrt_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x4: sqrt_v4f32); ++ impl_unary!(f32x8: sqrt_v8f32); ++ impl_unary!(f32x16: sqrt_v16f32); ++ ++ impl_unary!(f64x2: sqrt_v2f64); ++ impl_unary!(f64x4: sqrt_v4f64); ++ impl_unary!(f64x8: sqrt_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs +new file mode 100644 +index 000000000000..c1e379c34241 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs +@@ -0,0 +1,67 @@ ++//! Vertical floating-point `sqrt` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sqrte ++ ++use crate::llvm::simd_fsqrt; ++use crate::*; ++ ++crate trait Sqrte { ++ fn sqrte(self) -> Self; ++} ++ ++gen_unary_impl_table!(Sqrte, sqrte); ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); ++ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); ++ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x8: Sleef_sqrtf8_u35avx); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); ++ impl_unary!(f64x4: Sleef_sqrtd4_u35avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); ++ } else { ++ impl_unary!(f32x2[g]: simd_fsqrt); ++ impl_unary!(f32x16[g]: simd_fsqrt); ++ impl_unary!(f64x8[g]: simd_fsqrt); ++ ++ impl_unary!(f32x4[g]: simd_fsqrt); ++ impl_unary!(f32x8[g]: simd_fsqrt); ++ impl_unary!(f64x2[g]: simd_fsqrt); ++ impl_unary!(f64x4[g]: simd_fsqrt); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[g]: simd_fsqrt); ++ impl_unary!(f32x4[g]: simd_fsqrt); ++ impl_unary!(f32x8[g]: simd_fsqrt); ++ impl_unary!(f32x16[g]: simd_fsqrt); ++ ++ impl_unary!(f64x2[g]: simd_fsqrt); ++ impl_unary!(f64x4[g]: simd_fsqrt); ++ impl_unary!(f64x8[g]: simd_fsqrt); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs +new file mode 100644 +index 000000000000..39f493d3b17f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs +@@ -0,0 +1,28 @@ ++//! Provides `isize` and `usize` ++ ++use cfg_if::cfg_if; ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ crate type isize_ = i8; ++ crate type usize_ = u8; ++ } else if #[cfg(target_pointer_width = "16")] { ++ crate type isize_ = i16; ++ crate type usize_ = u16; ++ } else if #[cfg(target_pointer_width = "32")] { ++ crate type isize_ = i32; ++ crate type usize_ = u32; ++ ++ } else if #[cfg(target_pointer_width = "64")] { ++ crate type isize_ = i64; ++ crate type usize_ = u64; ++ } else if #[cfg(target_pointer_width = "64")] { ++ crate type isize_ = i64; ++ crate type usize_ = u64; ++ } else if #[cfg(target_pointer_width = "128")] { ++ crate type isize_ = i128; ++ crate type usize_ = u128; ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs +new file mode 100644 +index 000000000000..7be4f5fabbea +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions.rs +@@ -0,0 +1 @@ ++crate mod mask; +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs +new file mode 100644 +index 000000000000..97260c6d4e03 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs +@@ -0,0 +1,69 @@ ++//! Code generation workaround for `all()` mask horizontal reduction. ++//! ++//! Works arround [LLVM bug 36702]. ++//! ++//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 ++#![allow(unused_macros)] ++ ++use crate::*; ++ ++crate trait All: crate::marker::Sized { ++ unsafe fn all(self) -> bool; ++} ++ ++crate trait Any: crate::marker::Sized { ++ unsafe fn any(self) -> bool; ++} ++ ++#[macro_use] ++mod fallback_impl; ++ ++cfg_if! { ++ if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { ++ #[macro_use] ++ mod x86; ++ } else if #[cfg(all(target_arch = "arm", target_feature = "v7", ++ target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ #[macro_use] ++ mod arm; ++ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { ++ #[macro_use] ++ mod aarch64; ++ } else { ++ #[macro_use] ++ mod fallback; ++ } ++} ++ ++impl_mask_reductions!(m8x2); ++impl_mask_reductions!(m8x4); ++impl_mask_reductions!(m8x8); ++impl_mask_reductions!(m8x16); ++impl_mask_reductions!(m8x32); ++impl_mask_reductions!(m8x64); ++ ++impl_mask_reductions!(m16x2); ++impl_mask_reductions!(m16x4); ++impl_mask_reductions!(m16x8); ++impl_mask_reductions!(m16x16); ++impl_mask_reductions!(m16x32); ++ ++impl_mask_reductions!(m32x2); ++impl_mask_reductions!(m32x4); ++impl_mask_reductions!(m32x8); ++impl_mask_reductions!(m32x16); ++ ++// FIXME: 64-bit single element vector ++// impl_mask_reductions!(m64x1); ++impl_mask_reductions!(m64x2); ++impl_mask_reductions!(m64x4); ++impl_mask_reductions!(m64x8); ++ ++impl_mask_reductions!(m128x1); ++impl_mask_reductions!(m128x2); ++impl_mask_reductions!(m128x4); ++ ++impl_mask_reductions!(msizex2); ++impl_mask_reductions!(msizex4); ++impl_mask_reductions!(msizex8); +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs +new file mode 100644 +index 000000000000..e9586eace1ff +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs +@@ -0,0 +1,71 @@ ++//! Mask reductions implementation for `aarch64` targets ++ ++/// 128-bit wide vectors ++macro_rules! aarch64_128_neon_impl { ++ ($id:ident, $vmin:ident, $vmax:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn all(self) -> bool { ++ use crate::arch::aarch64::$vmin; ++ $vmin(crate::mem::transmute(self)) != 0 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn any(self) -> bool { ++ use crate::arch::aarch64::$vmax; ++ $vmax(crate::mem::transmute(self)) != 0 ++ } ++ } ++ } ++} ++ ++/// 64-bit wide vectors ++macro_rules! aarch64_64_neon_impl { ++ ($id:ident, $vec128:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn all(self) -> bool { ++ // Duplicates the 64-bit vector into a 128-bit one and ++ // calls all on that. ++ union U { ++ halves: ($id, $id), ++ vec: $vec128, ++ } ++ U { ++ halves: (self, self), ++ }.vec.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn any(self) -> bool { ++ union U { ++ halves: ($id, $id), ++ vec: $vec128, ++ } ++ U { ++ halves: (self, self), ++ }.vec.any() ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `aarch64` targets ++macro_rules! impl_mask_reductions { ++ // 64-bit wide masks ++ (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; ++ (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; ++ (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; ++ // 128-bit wide masks ++ (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; ++ (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; ++ (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs +new file mode 100644 +index 000000000000..1987af7a9676 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs +@@ -0,0 +1,54 @@ ++//! Mask reductions implementation for `arm` targets ++ ++/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with ++/// more than two elements. ++macro_rules! arm_128_v7_neon_impl { ++ ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "v7,neon")] ++ unsafe fn all(self) -> bool { ++ use crate::arch::arm::$vpmin; ++ use crate::mem::transmute; ++ union U { ++ halves: ($half, $half), ++ vec: $id, ++ } ++ let halves = U { vec: self }.halves; ++ let h: $half = transmute($vpmin( ++ transmute(halves.0), ++ transmute(halves.1), ++ )); ++ h.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "v7,neon")] ++ unsafe fn any(self) -> bool { ++ use crate::arch::arm::$vpmax; ++ use crate::mem::transmute; ++ union U { ++ halves: ($half, $half), ++ vec: $id, ++ } ++ let halves = U { vec: self }.halves; ++ let h: $half = transmute($vpmax( ++ transmute(halves.0), ++ transmute(halves.1), ++ )); ++ h.any() ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `arm` targets ++macro_rules! impl_mask_reductions { ++ // 128-bit wide masks ++ (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; ++ (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; ++ (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs +new file mode 100644 +index 000000000000..25e5c813abca +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs +@@ -0,0 +1,6 @@ ++//! Default mask reduction implementations. ++ ++/// Default mask reduction implementation ++macro_rules! impl_mask_reductions { ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs +new file mode 100644 +index 000000000000..0d246e2fdab6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs +@@ -0,0 +1,237 @@ ++//! Default implementation of a mask reduction for any target. ++ ++macro_rules! fallback_to_other_impl { ++ ($id:ident, $other:ident) => { ++ impl All for $id { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let m: $other = crate::mem::transmute(self); ++ m.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let m: $other = crate::mem::transmute(self); ++ m.any() ++ } ++ } ++ }; ++} ++ ++/// Fallback implementation. ++macro_rules! fallback_impl { ++ // 16-bit wide masks: ++ (m8x2) => { ++ impl All for m8x2 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u16 = crate::mem::transmute(self); ++ i == u16::max_value() ++ } ++ } ++ impl Any for m8x2 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u16 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ // 32-bit wide masks ++ (m8x4) => { ++ impl All for m8x4 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u32 = crate::mem::transmute(self); ++ i == u32::max_value() ++ } ++ } ++ impl Any for m8x4 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u32 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x2) => { ++ fallback_to_other_impl!(m16x2, m8x4); ++ }; ++ // 64-bit wide masks: ++ (m8x8) => { ++ impl All for m8x8 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u64 = crate::mem::transmute(self); ++ i == u64::max_value() ++ } ++ } ++ impl Any for m8x8 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u64 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x4) => { ++ fallback_to_other_impl!(m16x4, m8x8); ++ }; ++ (m32x2) => { ++ fallback_to_other_impl!(m32x2, m16x4); ++ }; ++ // FIXME: 64x1 maxk ++ // 128-bit wide masks: ++ (m8x16) => { ++ impl All for m8x16 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u128 = crate::mem::transmute(self); ++ i == u128::max_value() ++ } ++ } ++ impl Any for m8x16 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u128 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x8) => { ++ fallback_to_other_impl!(m16x8, m8x16); ++ }; ++ (m32x4) => { ++ fallback_to_other_impl!(m32x4, m16x8); ++ }; ++ (m64x2) => { ++ fallback_to_other_impl!(m64x2, m32x4); ++ }; ++ (m128x1) => { ++ fallback_to_other_impl!(m128x1, m64x2); ++ }; ++ // 256-bit wide masks ++ (m8x32) => { ++ impl All for m8x32 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: [u128; 2] = crate::mem::transmute(self); ++ let o: [u128; 2] = [u128::max_value(); 2]; ++ i == o ++ } ++ } ++ impl Any for m8x32 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: [u128; 2] = crate::mem::transmute(self); ++ let o: [u128; 2] = [0; 2]; ++ i != o ++ } ++ } ++ }; ++ (m16x16) => { ++ fallback_to_other_impl!(m16x16, m8x32); ++ }; ++ (m32x8) => { ++ fallback_to_other_impl!(m32x8, m16x16); ++ }; ++ (m64x4) => { ++ fallback_to_other_impl!(m64x4, m32x8); ++ }; ++ (m128x2) => { ++ fallback_to_other_impl!(m128x2, m64x4); ++ }; ++ // 512-bit wide masks ++ (m8x64) => { ++ impl All for m8x64 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: [u128; 4] = crate::mem::transmute(self); ++ let o: [u128; 4] = [u128::max_value(); 4]; ++ i == o ++ } ++ } ++ impl Any for m8x64 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: [u128; 4] = crate::mem::transmute(self); ++ let o: [u128; 4] = [0; 4]; ++ i != o ++ } ++ } ++ }; ++ (m16x32) => { ++ fallback_to_other_impl!(m16x32, m8x64); ++ }; ++ (m32x16) => { ++ fallback_to_other_impl!(m32x16, m16x32); ++ }; ++ (m64x8) => { ++ fallback_to_other_impl!(m64x8, m32x16); ++ }; ++ (m128x4) => { ++ fallback_to_other_impl!(m128x4, m64x8); ++ }; ++ // Masks with pointer-sized elements64 ++ (msizex2) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex2, m64x2); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex2, m32x2); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex4) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex4, m64x4); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex4, m32x4); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex8) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex8, m64x8); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex8, m32x8); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++} ++ ++macro_rules! recurse_half { ++ ($vid:ident, $vid_h:ident) => { ++ impl All for $vid { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ union U { ++ halves: ($vid_h, $vid_h), ++ vec: $vid, ++ } ++ let halves = U { vec: self }.halves; ++ halves.0.all() && halves.1.all() ++ } ++ } ++ impl Any for $vid { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ union U { ++ halves: ($vid_h, $vid_h), ++ vec: $vid, ++ } ++ let halves = U { vec: self }.halves; ++ halves.0.any() || halves.1.any() ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs +new file mode 100644 +index 000000000000..2ae4ed81c416 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs +@@ -0,0 +1,194 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets ++ ++#[cfg(target_feature = "sse")] ++#[macro_use] ++mod sse; ++ ++#[cfg(target_feature = "sse2")] ++#[macro_use] ++mod sse2; ++ ++#[cfg(target_feature = "avx")] ++#[macro_use] ++mod avx; ++ ++#[cfg(target_feature = "avx2")] ++#[macro_use] ++mod avx2; ++ ++/// x86 64-bit m8x8 implementation ++macro_rules! x86_m8x8_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { ++ x86_m8x8_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m8x16 implementation ++macro_rules! x86_m8x16_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse2")] { ++ x86_m8x16_sse2_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m32x4 implementation ++macro_rules! x86_m32x4_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse")] { ++ x86_m32x4_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m64x2 implementation ++macro_rules! x86_m64x2_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse2")] { ++ x86_m64x2_sse2_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ x86_m32x4_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m8x32 implementation ++macro_rules! x86_m8x32_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ x86_m8x32_avx2_impl!($id); ++ } else if #[cfg(target_feature = "avx")] { ++ x86_m8x32_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse2")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m32x8 implementation ++macro_rules! x86_m32x8_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx")] { ++ x86_m32x8_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m64x4 implementation ++macro_rules! x86_m64x4_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx")] { ++ x86_m64x4_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// Fallback implementation. ++macro_rules! x86_intr_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ use crate::llvm::simd_reduce_all; ++ simd_reduce_all(self.0) ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ use crate::llvm::simd_reduce_any; ++ simd_reduce_any(self.0) ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `x86` and `x86_64` targets ++macro_rules! impl_mask_reductions { ++ // 64-bit wide masks ++ (m8x8) => { x86_m8x8_impl!(m8x8); }; ++ (m16x4) => { x86_m8x8_impl!(m16x4); }; ++ (m32x2) => { x86_m8x8_impl!(m32x2); }; ++ // 128-bit wide masks ++ (m8x16) => { x86_m8x16_impl!(m8x16); }; ++ (m16x8) => { x86_m8x16_impl!(m16x8); }; ++ (m32x4) => { x86_m32x4_impl!(m32x4); }; ++ (m64x2) => { x86_m64x2_impl!(m64x2); }; ++ (m128x1) => { x86_intr_impl!(m128x1); }; ++ // 256-bit wide masks: ++ (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; ++ (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; ++ (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; ++ (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; ++ (m128x2) => { x86_intr_impl!(m128x2); }; ++ (msizex2) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex2, m64x2); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex2, m32x2); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex4) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex4, m64x4); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex4, m32x4); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex8) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex8, m64x8); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex8, m32x8); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs +new file mode 100644 +index 000000000000..d18736fb0399 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs +@@ -0,0 +1,101 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` ++ ++/// `x86`/`x86_64` 256-bit `AVX` implementation ++/// FIXME: it might be faster here to do two `_mm_movmask_epi8` ++#[cfg(target_feature = "avx")] ++macro_rules! x86_m8x32_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "avx")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_testc_si256; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_testc_si256; ++ _mm256_testc_si256( ++ crate::mem::transmute(self), ++ crate::mem::transmute($id::splat(true)), ++ ) != 0 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "avx")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_testz_si256; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_testz_si256; ++ _mm256_testz_si256( ++ crate::mem::transmute(self), ++ crate::mem::transmute(self), ++ ) == 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation ++macro_rules! x86_m32x8_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_ps; ++ // _mm256_movemask_ps(a) creates a 8bit mask containing the ++ // most significant bit of each lane of `a`. If all bits are ++ // set, then all 8 lanes of the mask are true. ++ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_ps; ++ ++ _mm256_movemask_ps(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation ++macro_rules! x86_m64x4_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_pd; ++ // _mm256_movemask_pd(a) creates a 4bit mask containing the ++ // most significant bit of each lane of `a`. If all bits are ++ // set, then all 4 lanes of the mask are true. ++ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_pd; ++ ++ _mm256_movemask_pd(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs +new file mode 100644 +index 000000000000..d37d02342092 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs +@@ -0,0 +1,35 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. ++#![allow(unused)] ++ ++/// x86/x86_64 256-bit m8x32 AVX2 implementation ++macro_rules! x86_m8x32_avx2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_epi8; ++ // _mm256_movemask_epi8(a) creates a 32bit mask containing the ++ // most significant bit of each byte of `a`. If all ++ // bits are set, then all 32 lanes of the mask are ++ // true. ++ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_epi8; ++ ++ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs +new file mode 100644 +index 000000000000..7482f9430a14 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs +@@ -0,0 +1,68 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. ++#![allow(unused)] ++ ++/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation ++macro_rules! x86_m32x4_sse_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_ps; ++ // _mm_movemask_ps(a) creates a 4bit mask containing the ++ // most significant bit of each lane of `a`. If all ++ // bits are set, then all 4 lanes of the mask are ++ // true. ++ _mm_movemask_ps(crate::mem::transmute(self)) ++ == 0b_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_ps; ++ ++ _mm_movemask_ps(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++macro_rules! x86_m8x8_sse_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pi8; ++ // _mm_movemask_pi8(a) creates an 8bit mask containing the most ++ // significant bit of each byte of `a`. If all bits are set, ++ // then all 8 lanes of the mask are true. ++ _mm_movemask_pi8(crate::mem::transmute(self)) ++ == u8::max_value() as i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pi8; ++ ++ _mm_movemask_pi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs +new file mode 100644 +index 000000000000..a99c606f5268 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs +@@ -0,0 +1,70 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. ++#![allow(unused)] ++ ++/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation ++macro_rules! x86_m64x2_sse2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pd; ++ // _mm_movemask_pd(a) creates a 2bit mask containing the ++ // most significant bit of each lane of `a`. If all ++ // bits are set, then all 2 lanes of the mask are ++ // true. ++ _mm_movemask_pd(crate::mem::transmute(self)) ++ == 0b_11_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pd; ++ ++ _mm_movemask_pd(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation ++macro_rules! x86_m8x16_sse2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_epi8; ++ // _mm_movemask_epi8(a) creates a 16bit mask containing the ++ // most significant bit of each byte of `a`. If all ++ // bits are set, then all 16 lanes of the mask are ++ // true. ++ _mm_movemask_epi8(crate::mem::transmute(self)) ++ == i32::from(u16::max_value()) ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_epi8; ++ ++ _mm_movemask_epi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs +new file mode 100644 +index 000000000000..35a9db905339 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs +@@ -0,0 +1,302 @@ ++//! Implementations of the `ShuffleResult` trait for the different numbers of ++//! lanes and vector element types. ++ ++use crate::masks::*; ++use crate::sealed::Shuffle; ++ ++impl Shuffle<[u32; 2]> for i8 { ++ type Output = crate::codegen::i8x2; ++} ++impl Shuffle<[u32; 4]> for i8 { ++ type Output = crate::codegen::i8x4; ++} ++impl Shuffle<[u32; 8]> for i8 { ++ type Output = crate::codegen::i8x8; ++} ++impl Shuffle<[u32; 16]> for i8 { ++ type Output = crate::codegen::i8x16; ++} ++impl Shuffle<[u32; 32]> for i8 { ++ type Output = crate::codegen::i8x32; ++} ++impl Shuffle<[u32; 64]> for i8 { ++ type Output = crate::codegen::i8x64; ++} ++ ++impl Shuffle<[u32; 2]> for u8 { ++ type Output = crate::codegen::u8x2; ++} ++impl Shuffle<[u32; 4]> for u8 { ++ type Output = crate::codegen::u8x4; ++} ++impl Shuffle<[u32; 8]> for u8 { ++ type Output = crate::codegen::u8x8; ++} ++impl Shuffle<[u32; 16]> for u8 { ++ type Output = crate::codegen::u8x16; ++} ++impl Shuffle<[u32; 32]> for u8 { ++ type Output = crate::codegen::u8x32; ++} ++impl Shuffle<[u32; 64]> for u8 { ++ type Output = crate::codegen::u8x64; ++} ++ ++impl Shuffle<[u32; 2]> for m8 { ++ type Output = crate::codegen::m8x2; ++} ++impl Shuffle<[u32; 4]> for m8 { ++ type Output = crate::codegen::m8x4; ++} ++impl Shuffle<[u32; 8]> for m8 { ++ type Output = crate::codegen::m8x8; ++} ++impl Shuffle<[u32; 16]> for m8 { ++ type Output = crate::codegen::m8x16; ++} ++impl Shuffle<[u32; 32]> for m8 { ++ type Output = crate::codegen::m8x32; ++} ++impl Shuffle<[u32; 64]> for m8 { ++ type Output = crate::codegen::m8x64; ++} ++ ++impl Shuffle<[u32; 2]> for i16 { ++ type Output = crate::codegen::i16x2; ++} ++impl Shuffle<[u32; 4]> for i16 { ++ type Output = crate::codegen::i16x4; ++} ++impl Shuffle<[u32; 8]> for i16 { ++ type Output = crate::codegen::i16x8; ++} ++impl Shuffle<[u32; 16]> for i16 { ++ type Output = crate::codegen::i16x16; ++} ++impl Shuffle<[u32; 32]> for i16 { ++ type Output = crate::codegen::i16x32; ++} ++ ++impl Shuffle<[u32; 2]> for u16 { ++ type Output = crate::codegen::u16x2; ++} ++impl Shuffle<[u32; 4]> for u16 { ++ type Output = crate::codegen::u16x4; ++} ++impl Shuffle<[u32; 8]> for u16 { ++ type Output = crate::codegen::u16x8; ++} ++impl Shuffle<[u32; 16]> for u16 { ++ type Output = crate::codegen::u16x16; ++} ++impl Shuffle<[u32; 32]> for u16 { ++ type Output = crate::codegen::u16x32; ++} ++ ++impl Shuffle<[u32; 2]> for m16 { ++ type Output = crate::codegen::m16x2; ++} ++impl Shuffle<[u32; 4]> for m16 { ++ type Output = crate::codegen::m16x4; ++} ++impl Shuffle<[u32; 8]> for m16 { ++ type Output = crate::codegen::m16x8; ++} ++impl Shuffle<[u32; 16]> for m16 { ++ type Output = crate::codegen::m16x16; ++} ++impl Shuffle<[u32; 32]> for m16 { ++ type Output = crate::codegen::m16x32; ++} ++ ++impl Shuffle<[u32; 2]> for i32 { ++ type Output = crate::codegen::i32x2; ++} ++impl Shuffle<[u32; 4]> for i32 { ++ type Output = crate::codegen::i32x4; ++} ++impl Shuffle<[u32; 8]> for i32 { ++ type Output = crate::codegen::i32x8; ++} ++impl Shuffle<[u32; 16]> for i32 { ++ type Output = crate::codegen::i32x16; ++} ++ ++impl Shuffle<[u32; 2]> for u32 { ++ type Output = crate::codegen::u32x2; ++} ++impl Shuffle<[u32; 4]> for u32 { ++ type Output = crate::codegen::u32x4; ++} ++impl Shuffle<[u32; 8]> for u32 { ++ type Output = crate::codegen::u32x8; ++} ++impl Shuffle<[u32; 16]> for u32 { ++ type Output = crate::codegen::u32x16; ++} ++ ++impl Shuffle<[u32; 2]> for f32 { ++ type Output = crate::codegen::f32x2; ++} ++impl Shuffle<[u32; 4]> for f32 { ++ type Output = crate::codegen::f32x4; ++} ++impl Shuffle<[u32; 8]> for f32 { ++ type Output = crate::codegen::f32x8; ++} ++impl Shuffle<[u32; 16]> for f32 { ++ type Output = crate::codegen::f32x16; ++} ++ ++impl Shuffle<[u32; 2]> for m32 { ++ type Output = crate::codegen::m32x2; ++} ++impl Shuffle<[u32; 4]> for m32 { ++ type Output = crate::codegen::m32x4; ++} ++impl Shuffle<[u32; 8]> for m32 { ++ type Output = crate::codegen::m32x8; ++} ++impl Shuffle<[u32; 16]> for m32 { ++ type Output = crate::codegen::m32x16; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for i64 { ++ type Output = crate::codegen::i64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for i64 { ++ type Output = crate::codegen::i64x2; ++} ++impl Shuffle<[u32; 4]> for i64 { ++ type Output = crate::codegen::i64x4; ++} ++impl Shuffle<[u32; 8]> for i64 { ++ type Output = crate::codegen::i64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for u64 { ++ type Output = crate::codegen::u64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for u64 { ++ type Output = crate::codegen::u64x2; ++} ++impl Shuffle<[u32; 4]> for u64 { ++ type Output = crate::codegen::u64x4; ++} ++impl Shuffle<[u32; 8]> for u64 { ++ type Output = crate::codegen::u64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for f64 { ++ type Output = crate::codegen::f64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for f64 { ++ type Output = crate::codegen::f64x2; ++} ++impl Shuffle<[u32; 4]> for f64 { ++ type Output = crate::codegen::f64x4; ++} ++impl Shuffle<[u32; 8]> for f64 { ++ type Output = crate::codegen::f64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for m64 { ++ type Output = crate::codegen::m64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for m64 { ++ type Output = crate::codegen::m64x2; ++} ++impl Shuffle<[u32; 4]> for m64 { ++ type Output = crate::codegen::m64x4; ++} ++impl Shuffle<[u32; 8]> for m64 { ++ type Output = crate::codegen::m64x8; ++} ++ ++impl Shuffle<[u32; 2]> for isize { ++ type Output = crate::codegen::isizex2; ++} ++impl Shuffle<[u32; 4]> for isize { ++ type Output = crate::codegen::isizex4; ++} ++impl Shuffle<[u32; 8]> for isize { ++ type Output = crate::codegen::isizex8; ++} ++ ++impl Shuffle<[u32; 2]> for usize { ++ type Output = crate::codegen::usizex2; ++} ++impl Shuffle<[u32; 4]> for usize { ++ type Output = crate::codegen::usizex4; ++} ++impl Shuffle<[u32; 8]> for usize { ++ type Output = crate::codegen::usizex8; ++} ++ ++impl Shuffle<[u32; 2]> for *const T { ++ type Output = crate::codegen::cptrx2; ++} ++impl Shuffle<[u32; 4]> for *const T { ++ type Output = crate::codegen::cptrx4; ++} ++impl Shuffle<[u32; 8]> for *const T { ++ type Output = crate::codegen::cptrx8; ++} ++ ++impl Shuffle<[u32; 2]> for *mut T { ++ type Output = crate::codegen::mptrx2; ++} ++impl Shuffle<[u32; 4]> for *mut T { ++ type Output = crate::codegen::mptrx4; ++} ++impl Shuffle<[u32; 8]> for *mut T { ++ type Output = crate::codegen::mptrx8; ++} ++ ++impl Shuffle<[u32; 2]> for msize { ++ type Output = crate::codegen::msizex2; ++} ++impl Shuffle<[u32; 4]> for msize { ++ type Output = crate::codegen::msizex4; ++} ++impl Shuffle<[u32; 8]> for msize { ++ type Output = crate::codegen::msizex8; ++} ++ ++impl Shuffle<[u32; 1]> for i128 { ++ type Output = crate::codegen::i128x1; ++} ++impl Shuffle<[u32; 2]> for i128 { ++ type Output = crate::codegen::i128x2; ++} ++impl Shuffle<[u32; 4]> for i128 { ++ type Output = crate::codegen::i128x4; ++} ++ ++impl Shuffle<[u32; 1]> for u128 { ++ type Output = crate::codegen::u128x1; ++} ++impl Shuffle<[u32; 2]> for u128 { ++ type Output = crate::codegen::u128x2; ++} ++impl Shuffle<[u32; 4]> for u128 { ++ type Output = crate::codegen::u128x4; ++} ++ ++impl Shuffle<[u32; 1]> for m128 { ++ type Output = crate::codegen::m128x1; ++} ++impl Shuffle<[u32; 2]> for m128 { ++ type Output = crate::codegen::m128x2; ++} ++impl Shuffle<[u32; 4]> for m128 { ++ type Output = crate::codegen::m128x4; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs +new file mode 100644 +index 000000000000..1e9f5816371a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs +@@ -0,0 +1,432 @@ ++//! Shuffle vector lanes with run-time indices. ++ ++use crate::*; ++ ++pub trait Shuffle1Dyn { ++ type Indices; ++ fn shuffle1_dyn(self, _: Self::Indices) -> Self; ++} ++ ++// Fallback implementation ++macro_rules! impl_fallback { ++ ($id:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let mut result = Self::splat(0); ++ for i in 0..$id::lanes() { ++ result = result ++ .replace(i, self.extract(indices.extract(i) as usize)); ++ } ++ result ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_shuffle1_dyn { ++ (u8x8) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "ssse3"))] { ++ impl Shuffle1Dyn for u8x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_shuffle_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_shuffle_pi8; ++ ++ unsafe { ++ crate::mem::transmute( ++ _mm_shuffle_pi8( ++ crate::mem::transmute(self.0), ++ crate::mem::transmute(indices.0) ++ ) ++ ) ++ } ++ } ++ } ++ } else if #[cfg(all( ++ any( ++ all(target_aarch = "aarch64", target_feature = "neon"), ++ all(target_aarch = "arm", target_feature = "v7", ++ target_feature = "neon") ++ ), ++ any(feature = "core_arch", libcore_neon) ++ ) ++ )] { ++ impl Shuffle1Dyn for u8x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(targt_arch = "aarch64")] ++ use crate::arch::aarch64::vtbl1_u8; ++ #[cfg(targt_arch = "arm")] ++ use crate::arch::arm::vtbl1_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ vtbl1_u8(mem::transmute(self.0), ++ crate::mem::transmute(indices.0)) ++ )) ++ } ++ } ++ } ++ } else { ++ impl_fallback!(u8x8); ++ } ++ } ++ }; ++ (u8x16) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "ssse3"))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_shuffle_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_shuffle_epi8; ++ // This is safe because the binary is compiled with ++ // ssse3 enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ _mm_shuffle_epi8(mem::transmute(self.0), ++ crate::mem::transmute(indices)) ++ )) ++ } ++ } ++ } ++ } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ use crate::arch::aarch64::vqtbl1q_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ vqtbl1q_u8(mem::transmute(self.0), ++ crate::mem::transmute(indices.0)) ++ )) ++ } ++ } ++ } ++ } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", ++ target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ use crate::arch::arm::vtbl2_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ union U { ++ j: u8x16, ++ s: (u8x8, u8x8), ++ } ++ ++ let (i0, i1) = U { j: y }.s; ++ ++ let r0 = vtbl2_u8( ++ mem::transmute(x), ++ crate::mem::transmute(i0) ++ ); ++ let r1 = vtbl2_u8( ++ mem::transmute(x), ++ crate::mem::transmute(i1) ++ ); ++ ++ let r = U { s: (r0, r1) }.j; ++ ++ Simd(mem::transmute(r)) ++ } ++ } ++ } ++ } else { ++ impl_fallback!(u8x16); ++ } ++ } ++ }; ++ (u16x8) => { ++ impl Shuffle1Dyn for u16x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x8 = (indices * 2).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] ++ ); ++ let v = u8x16::new( ++ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++ (u32x4) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "avx"))] { ++ impl Shuffle1Dyn for u32x4 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::{_mm_permutevar_ps}; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::{_mm_permutevar_ps}; ++ ++ unsafe { ++ crate::mem::transmute( ++ _mm_permutevar_ps( ++ crate::mem::transmute(self.0), ++ crate::mem::transmute(indices.0) ++ ) ++ ) ++ } ++ } ++ } ++ } else { ++ impl Shuffle1Dyn for u32x4 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x4 = (indices * 4).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, ++ [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] ++ ); ++ let v = u8x16::new( ++ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ } ++ } ++ }; ++ (u64x2) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "avx"))] { ++ impl Shuffle1Dyn for u64x2 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::{_mm_permutevar_pd}; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::{_mm_permutevar_pd}; ++ // _mm_permutevar_pd uses the _second_ bit of each ++ // element to perform the selection, that is: 0b00 => 0, ++ // 0b10 => 1: ++ let indices = indices << 1; ++ unsafe { ++ crate::mem::transmute( ++ _mm_permutevar_pd( ++ crate::mem::transmute(self), ++ crate::mem::transmute(indices) ++ ) ++ ) ++ } ++ } ++ } ++ } else { ++ impl Shuffle1Dyn for u64x2 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x2 = (indices * 8).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, ++ [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ); ++ let v = u8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ } ++ } ++ }; ++ (u128x1) => { ++ impl Shuffle1Dyn for u128x1 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { ++ self ++ } ++ } ++ }; ++ ($id:ident) => { impl_fallback!($id); } ++} ++ ++impl_shuffle1_dyn!(u8x2); ++impl_shuffle1_dyn!(u8x4); ++impl_shuffle1_dyn!(u8x8); ++impl_shuffle1_dyn!(u8x16); ++impl_shuffle1_dyn!(u8x32); ++impl_shuffle1_dyn!(u8x64); ++ ++impl_shuffle1_dyn!(u16x2); ++impl_shuffle1_dyn!(u16x4); ++impl_shuffle1_dyn!(u16x8); ++impl_shuffle1_dyn!(u16x16); ++impl_shuffle1_dyn!(u16x32); ++ ++impl_shuffle1_dyn!(u32x2); ++impl_shuffle1_dyn!(u32x4); ++impl_shuffle1_dyn!(u32x8); ++impl_shuffle1_dyn!(u32x16); ++ ++impl_shuffle1_dyn!(u64x2); ++impl_shuffle1_dyn!(u64x4); ++impl_shuffle1_dyn!(u64x8); ++ ++impl_shuffle1_dyn!(usizex2); ++impl_shuffle1_dyn!(usizex4); ++impl_shuffle1_dyn!(usizex8); ++ ++impl_shuffle1_dyn!(u128x1); ++impl_shuffle1_dyn!(u128x2); ++impl_shuffle1_dyn!(u128x4); ++ ++// Implementation for non-unsigned vector types ++macro_rules! impl_shuffle1_dyn_non_u { ++ ($id:ident, $uid:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = $uid; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ unsafe { ++ let u: $uid = crate::mem::transmute(self); ++ crate::mem::transmute(u.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++} ++ ++impl_shuffle1_dyn_non_u!(i8x2, u8x2); ++impl_shuffle1_dyn_non_u!(i8x4, u8x4); ++impl_shuffle1_dyn_non_u!(i8x8, u8x8); ++impl_shuffle1_dyn_non_u!(i8x16, u8x16); ++impl_shuffle1_dyn_non_u!(i8x32, u8x32); ++impl_shuffle1_dyn_non_u!(i8x64, u8x64); ++ ++impl_shuffle1_dyn_non_u!(i16x2, u16x2); ++impl_shuffle1_dyn_non_u!(i16x4, u16x4); ++impl_shuffle1_dyn_non_u!(i16x8, u16x8); ++impl_shuffle1_dyn_non_u!(i16x16, u16x16); ++impl_shuffle1_dyn_non_u!(i16x32, u16x32); ++ ++impl_shuffle1_dyn_non_u!(i32x2, u32x2); ++impl_shuffle1_dyn_non_u!(i32x4, u32x4); ++impl_shuffle1_dyn_non_u!(i32x8, u32x8); ++impl_shuffle1_dyn_non_u!(i32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(i64x2, u64x2); ++impl_shuffle1_dyn_non_u!(i64x4, u64x4); ++impl_shuffle1_dyn_non_u!(i64x8, u64x8); ++ ++impl_shuffle1_dyn_non_u!(isizex2, usizex2); ++impl_shuffle1_dyn_non_u!(isizex4, usizex4); ++impl_shuffle1_dyn_non_u!(isizex8, usizex8); ++ ++impl_shuffle1_dyn_non_u!(i128x1, u128x1); ++impl_shuffle1_dyn_non_u!(i128x2, u128x2); ++impl_shuffle1_dyn_non_u!(i128x4, u128x4); ++ ++impl_shuffle1_dyn_non_u!(m8x2, u8x2); ++impl_shuffle1_dyn_non_u!(m8x4, u8x4); ++impl_shuffle1_dyn_non_u!(m8x8, u8x8); ++impl_shuffle1_dyn_non_u!(m8x16, u8x16); ++impl_shuffle1_dyn_non_u!(m8x32, u8x32); ++impl_shuffle1_dyn_non_u!(m8x64, u8x64); ++ ++impl_shuffle1_dyn_non_u!(m16x2, u16x2); ++impl_shuffle1_dyn_non_u!(m16x4, u16x4); ++impl_shuffle1_dyn_non_u!(m16x8, u16x8); ++impl_shuffle1_dyn_non_u!(m16x16, u16x16); ++impl_shuffle1_dyn_non_u!(m16x32, u16x32); ++ ++impl_shuffle1_dyn_non_u!(m32x2, u32x2); ++impl_shuffle1_dyn_non_u!(m32x4, u32x4); ++impl_shuffle1_dyn_non_u!(m32x8, u32x8); ++impl_shuffle1_dyn_non_u!(m32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(m64x2, u64x2); ++impl_shuffle1_dyn_non_u!(m64x4, u64x4); ++impl_shuffle1_dyn_non_u!(m64x8, u64x8); ++ ++impl_shuffle1_dyn_non_u!(msizex2, usizex2); ++impl_shuffle1_dyn_non_u!(msizex4, usizex4); ++impl_shuffle1_dyn_non_u!(msizex8, usizex8); ++ ++impl_shuffle1_dyn_non_u!(m128x1, u128x1); ++impl_shuffle1_dyn_non_u!(m128x2, u128x2); ++impl_shuffle1_dyn_non_u!(m128x4, u128x4); ++ ++impl_shuffle1_dyn_non_u!(f32x2, u32x2); ++impl_shuffle1_dyn_non_u!(f32x4, u32x4); ++impl_shuffle1_dyn_non_u!(f32x8, u32x8); ++impl_shuffle1_dyn_non_u!(f32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(f64x2, u64x2); ++impl_shuffle1_dyn_non_u!(f64x4, u64x4); ++impl_shuffle1_dyn_non_u!(f64x8, u64x8); ++ ++// Implementation for non-unsigned vector types ++macro_rules! impl_shuffle1_dyn_ptr { ++ ($id:ident, $uid:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = $uid; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ unsafe { ++ let u: $uid = crate::mem::transmute(self); ++ crate::mem::transmute(u.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++} ++ ++impl_shuffle1_dyn_ptr!(cptrx2, usizex2); ++impl_shuffle1_dyn_ptr!(cptrx4, usizex4); ++impl_shuffle1_dyn_ptr!(cptrx8, usizex8); ++ ++impl_shuffle1_dyn_ptr!(mptrx2, usizex2); ++impl_shuffle1_dyn_ptr!(mptrx4, usizex4); ++impl_shuffle1_dyn_ptr!(mptrx8, usizex8); +diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs +new file mode 100644 +index 000000000000..b435fb5da120 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs +@@ -0,0 +1,189 @@ ++//! Horizontal swap bytes reductions. ++ ++// FIXME: investigate using `llvm.bswap` ++// https://github.com/rust-lang-nursery/packed_simd/issues/19 ++ ++use crate::*; ++ ++crate trait SwapBytes { ++ fn swap_bytes(self) -> Self; ++} ++ ++macro_rules! impl_swap_bytes { ++ (v16: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ fn swap_bytes(self) -> Self { ++ unsafe { shuffle!(self, [1, 0]) } ++ } ++ } ++ )+ ++ }; ++ (v32: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x4 = crate::mem::transmute(self); ++ let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v64: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x8 = crate::mem::transmute(self); ++ let result: u8x8 = shuffle!( ++ bytes, [7, 6, 5, 4, 3, 2, 1, 0] ++ ); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v128: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x16 = crate::mem::transmute(self); ++ let result: u8x16 = shuffle!(bytes, [ ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v256: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x32 = crate::mem::transmute(self); ++ let result: u8x32 = shuffle!(bytes, [ ++ 31, 30, 29, 28, 27, 26, 25, 24, ++ 23, 22, 21, 20, 19, 18, 17, 16, ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v512: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x64 = crate::mem::transmute(self); ++ let result: u8x64 = shuffle!(bytes, [ ++ 63, 62, 61, 60, 59, 58, 57, 56, ++ 55, 54, 53, 52, 51, 50, 49, 48, ++ 47, 46, 45, 44, 43, 42, 41, 40, ++ 39, 38, 37, 36, 35, 34, 33, 32, ++ 31, 30, 29, 28, 27, 26, 25, 24, ++ 23, 22, 21, 20, 19, 18, 17, 16, ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++} ++ ++impl_swap_bytes!(v16: u8x2, i8x2,); ++impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); ++// FIXME: 64-bit single element vector ++impl_swap_bytes!( ++ v64: u8x8, ++ i8x8, ++ u16x4, ++ i16x4, ++ u32x2, ++ i32x2, /* u64x1, i64x1, */ ++); ++ ++impl_swap_bytes!( ++ v128: u8x16, ++ i8x16, ++ u16x8, ++ i16x8, ++ u32x4, ++ i32x4, ++ u64x2, ++ i64x2, ++ u128x1, ++ i128x1, ++); ++impl_swap_bytes!( ++ v256: u8x32, ++ i8x32, ++ u16x16, ++ i16x16, ++ u32x8, ++ i32x8, ++ u64x4, ++ i64x4, ++ u128x2, ++ i128x2, ++); ++ ++impl_swap_bytes!( ++ v512: u8x64, ++ i8x64, ++ u16x32, ++ i16x32, ++ u32x16, ++ i32x16, ++ u64x8, ++ i64x8, ++ u128x4, ++ i128x4, ++); ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ impl_swap_bytes!(v16: isizex2, usizex2,); ++ impl_swap_bytes!(v32: isizex4, usizex4,); ++ impl_swap_bytes!(v64: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "16")] { ++ impl_swap_bytes!(v32: isizex2, usizex2,); ++ impl_swap_bytes!(v64: isizex4, usizex4,); ++ impl_swap_bytes!(v128: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "32")] { ++ impl_swap_bytes!(v64: isizex2, usizex2,); ++ impl_swap_bytes!(v128: isizex4, usizex4,); ++ impl_swap_bytes!(v256: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "64")] { ++ impl_swap_bytes!(v128: isizex2, usizex2,); ++ impl_swap_bytes!(v256: isizex4, usizex4,); ++ impl_swap_bytes!(v512: isizex8, usizex8,); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs +new file mode 100644 +index 000000000000..9506424fadad +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v128.rs +@@ -0,0 +1,46 @@ ++//! Internal 128-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 16]: i8x16 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 16]: u8x16 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 16]: m8x16 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++ ++impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16); ++impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16); ++impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16); ++ ++impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32); ++impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32); ++impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32); ++impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32); ++ ++impl_simd_array!([i64; 2]: i64x2 | i64, i64); ++impl_simd_array!([u64; 2]: u64x2 | u64, u64); ++impl_simd_array!([f64; 2]: f64x2 | f64, f64); ++impl_simd_array!([m64; 2]: m64x2 | i64, i64); ++ ++impl_simd_array!([i128; 1]: i128x1 | i128); ++impl_simd_array!([u128; 1]: u128x1 | u128); ++impl_simd_array!([m128; 1]: m128x1 | i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs +new file mode 100644 +index 000000000000..4d55a6d8998e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v16.rs +@@ -0,0 +1,7 @@ ++//! Internal 16-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 2]: i8x2 | i8, i8); ++impl_simd_array!([u8; 2]: u8x2 | u8, u8); ++impl_simd_array!([m8; 2]: m8x2 | i8, i8); +diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs +new file mode 100644 +index 000000000000..5ca4759f0c0a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v256.rs +@@ -0,0 +1,78 @@ ++//! Internal 256-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 32]: i8x32 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 32]: u8x32 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 32]: m8x32 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [i16; 16]: i16x16 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u16; 16]: u16x16 | ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m16; 16]: m16x16 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++ ++impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32); ++impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32); ++impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32); ++impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32); ++ ++impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64); ++impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64); ++impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64); ++impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64); ++ ++impl_simd_array!([i128; 2]: i128x2 | i128, i128); ++impl_simd_array!([u128; 2]: u128x2 | u128, u128); ++impl_simd_array!([m128; 2]: m128x2 | i128, i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs +new file mode 100644 +index 000000000000..ae1dabd00c22 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v32.rs +@@ -0,0 +1,11 @@ ++//! Internal 32-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8); ++impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8); ++impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8); ++ ++impl_simd_array!([i16; 2]: i16x2 | i16, i16); ++impl_simd_array!([u16; 2]: u16x2 | u16, u16); ++impl_simd_array!([m16; 2]: m16x2 | i16, i16); +diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs +new file mode 100644 +index 000000000000..bf95110340d6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v512.rs +@@ -0,0 +1,145 @@ ++//! Internal 512-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 64]: i8x64 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 64]: u8x64 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 64]: m8x64 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [i16; 32]: i16x32 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u16; 32]: u16x32 | ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m16; 32]: m16x32 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i32; 16]: i32x16 | ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u32; 16]: u32x16 | ++ u32, u32, u32, u32, ++ u32, u32, u32, u32, ++ u32, u32, u32, u32, ++ u32, u32, u32, u32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [f32; 16]: f32x16 | ++ f32, f32, f32, f32, ++ f32, f32, f32, f32, ++ f32, f32, f32, f32, ++ f32, f32, f32, f32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m32; 16]: m32x16 | ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32 ++); ++ ++impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64); ++impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64); ++impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64); ++impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64); ++ ++impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128); ++impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128); ++impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs +new file mode 100644 +index 000000000000..3cfb67c1a013 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v64.rs +@@ -0,0 +1,21 @@ ++//! Internal 64-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8); ++impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8); ++impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8); ++ ++impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16); ++impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16); ++impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16); ++ ++impl_simd_array!([i32; 2]: i32x2 | i32, i32); ++impl_simd_array!([u32; 2]: u32x2 | u32, u32); ++impl_simd_array!([f32; 2]: f32x2 | f32, f32); ++impl_simd_array!([m32; 2]: m32x2 | i32, i32); ++ ++impl_simd_array!([i64; 1]: i64x1 | i64); ++impl_simd_array!([u64; 1]: u64x1 | u64); ++impl_simd_array!([f64; 1]: f64x1 | f64); ++impl_simd_array!([m64; 1]: m64x1 | i64); +diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs +new file mode 100644 +index 000000000000..1f2bc7714dd9 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs +@@ -0,0 +1,33 @@ ++//! Pointer vector types ++ ++macro_rules! impl_simd_ptr { ++ ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident ++ | $($tys:ty),*) => { ++ #[derive(Copy, Clone)] ++ #[repr(simd)] ++ pub struct $tuple_id<$ty>($(crate $tys),*); ++ //^^^^^^^ leaked through SimdArray ++ ++ impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] { ++ type Tuple = $tuple_id<$ptr_ty>; ++ type T = $ptr_ty; ++ const N: usize = $elem_count; ++ type NT = [u32; $elem_count]; ++ } ++ ++ impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> { ++ type Element = $ptr_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ } ++} ++ ++impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T); ++impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T); ++impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T); ++ ++impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T); ++impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T); ++impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T); +diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs +new file mode 100644 +index 000000000000..3911b21340c8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/vSize.rs +@@ -0,0 +1,43 @@ ++//! Vector types with pointer-sized elements ++ ++use crate::codegen::pointer_sized_int::{isize_, usize_}; ++use crate::masks::*; ++ ++impl_simd_array!([isize; 2]: isizex2 | isize_, isize_); ++impl_simd_array!([usize; 2]: usizex2 | usize_, usize_); ++impl_simd_array!([msize; 2]: msizex2 | isize_, isize_); ++ ++impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_); ++impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_); ++impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_); ++ ++impl_simd_array!( ++ [isize; 8]: isizex8 | isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_ ++); ++impl_simd_array!( ++ [usize; 8]: usizex8 | usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_ ++); ++impl_simd_array!( ++ [msize; 8]: msizex8 | isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_ ++); +diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs +new file mode 100644 +index 000000000000..d73645e72fbe +--- /dev/null ++++ b/third_party/rust/packed_simd/src/lib.rs +@@ -0,0 +1,327 @@ ++//! # Portable packed SIMD vectors ++//! ++//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366: ++//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) . ++//! ++//! The examples available in the ++//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples) ++//! sub-directory of the crate showcase how to use the library in practice. ++//! ++//! ## Table of contents ++//! ++//! - [Introduction](#introduction) ++//! - [Vector types](#vector-types) ++//! - [Conditional operations](#conditional-operations) ++//! - [Conversions](#conversions) ++//! - [Performance ++//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/) ++//! ++//! ## Introduction ++//! ++//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N` ++//! elements of type `T` as well as many type aliases for this type: for ++//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`. ++//! ++//! The operations on packed vectors are, by default, "vertical", that is, they ++//! are applied to each vector lane in isolation of the others: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 2, 3, 4); ++//! let b = i32x4::new(5, 6, 7, 8); ++//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12)); ++//! ``` ++//! ++//! Many "horizontal" operations are also provided: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! # let a = i32x4::new(1, 2, 3, 4); ++//! assert_eq!(a.wrapping_sum(), 10); ++//! ``` ++//! ++//! In virtually all architectures vertical operations are fast, while ++//! horizontal operations are, by comparison, much slower. That is, the ++//! most portably-efficient way of performing a reduction over a slice ++//! is to collect the results into a vector using vertical operations, ++//! and performing a single horizontal operation at the end: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! fn reduce(x: &[i32]) -> i32 { ++//! assert!(x.len() % 4 == 0); ++//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0] ++//! for i in (0..x.len()).step_by(4) { ++//! sum += i32x4::from_slice_unaligned(&x[i..]); ++//! } ++//! sum.wrapping_sum() ++//! } ++//! ++//! let x = [0, 1, 2, 3, 4, 5, 6, 7]; ++//! assert_eq!(reduce(&x), 28); ++//! ``` ++//! ++//! ## Vector types ++//! ++//! The vector type aliases are named according to the following scheme: ++//! ++//! > `{element_type}x{number_of_lanes} == Simd<[element_type; ++//! number_of_lanes]>` ++//! ++//! where the following element types are supported: ++//! ++//! * `i{element_width}`: signed integer ++//! * `u{element_width}`: unsigned integer ++//! * `f{element_width}`: float ++//! * `m{element_width}`: mask (see below) ++//! * `*{const,mut} T`: `const` and `mut` pointers ++//! ++//! ## Basic operations ++//! ++//! ``` ++//! # use packed_simd::*; ++//! // Sets all elements to `0`: ++//! let a = i32x4::splat(0); ++//! ++//! // Reads a vector from a slice: ++//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5]; ++//! let b = i32x4::from_slice_unaligned(&arr); ++//! ++//! // Reads the 4-th element of a vector: ++//! assert_eq!(b.extract(3), 1); ++//! ++//! // Returns a new vector where the 4-th element is replaced with `1`: ++//! let a = a.replace(3, 1); ++//! assert_eq!(a, b); ++//! ++//! // Writes a vector to a slice: ++//! let a = a.replace(2, 1); ++//! a.write_to_slice_unaligned(&mut arr[4..]); ++//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]); ++//! ``` ++//! ++//! ## Conditional operations ++//! ++//! One often needs to perform an operation on some lanes of the vector. Vector ++//! masks, like `m32x4`, allow selecting on which vector lanes an operation is ++//! to be performed: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 1, 2, 2); ++//! ++//! // Add `1` to the first two lanes of the vector. ++//! let m = m16x4::new(true, true, false, false); ++//! let a = m.select(a + 1, a); ++//! assert_eq!(a, i32x4::splat(2)); ++//! ``` ++//! ++//! The elements of a vector mask are either `true` or `false`. Here `true` ++//! means that a lane is "selected", while `false` means that a lane is not ++//! selected. ++//! ++//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that ++//! works on all vectors that have the same number of lanes as the mask. The ++//! resulting vector contains the elements of `a` for those lanes for which the ++//! mask is `true`, and the elements of `b` otherwise. ++//! ++//! The example constructs a mask with the first two lanes set to `true` and ++//! the last two lanes set to `false`. This selects the first two lanes of `a + ++//! 1` and the last two lanes of `a`, producing a vector where the first two ++//! lanes have been incremented by `1`. ++//! ++//! > note: mask `select` can be used on vector types that have the same number ++//! > of lanes as the mask. The example shows this by using [`m16x4`] instead ++//! > of [`m32x4`]. It is _typically_ more performant to use a mask element ++//! > width equal to the element width of the vectors being operated upon. ++//! > This is, however, not true for 512-bit wide vectors when targetting ++//! > AVX-512, where the most efficient masks use only 1-bit per element. ++//! ++//! All vertical comparison operations returns masks: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 1, 3, 3); ++//! let b = i32x4::new(2, 2, 0, 0); ++//! ++//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne). ++//! let m = a.ge(i32x4::splat(2)); ++//! ++//! if m.any() { ++//! // all / any / none allow coherent control flow ++//! let d = m.select(a, b); ++//! assert_eq!(d, i32x4::new(2, 2, 3, 3)); ++//! } ++//! ``` ++//! ++//! ## Conversions ++//! ++//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for ++//! vectors with the same number of lanes when the conversion is value ++//! preserving (same as in `std`). ++//! ++//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the ++//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise ++//! `transmute`s when all bit patterns of the source type are valid bit ++//! patterns of the target type and are also implemented for the ++//! architecture-specific vector types of `std::arch`. For example, `let x: ++//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit ++//! patterns are valid `u8x8` bit patterns. However, the opposite is not ++//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this ++//! operation cannot be peformed safely using `x.into_bits()`; one needs to ++//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the ++//! value in the `u8x8` is a valid bit-pattern of `m8x8`. ++//! ++//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`] ++//! (`x.cast()`), just like `as`: ++//! ++//! * casting integer vectors whose lane types have the same size (e.g. ++//! `i32xN` -> `u32xN`) is a **no-op**, ++//! ++//! * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++//! `u8xN`) will **truncate**, ++//! ++//! * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++//! `u32xN`) will: ++//! * **zero-extend** if the source is unsigned, or ++//! * **sign-extend** if the source is signed, ++//! ++//! * casting from a float to an integer will **round the float towards ++//! zero**, ++//! ++//! * casting from an integer to float will produce the floating point ++//! representation of the integer, **rounding to nearest, ties to even**, ++//! ++//! * casting from an `f32` to an `f64` is perfect and lossless, ++//! ++//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++//! ++//! Numeric casts are not very "precise": sometimes lossy, sometimes value ++//! preserving, etc. ++ ++#![feature( ++ repr_simd, ++ const_fn, ++ platform_intrinsics, ++ stdsimd, ++ aarch64_target_feature, ++ arm_target_feature, ++ link_llvm_intrinsics, ++ core_intrinsics, ++ stmt_expr_attributes, ++ align_offset, ++ mmx_target_feature, ++ crate_visibility_modifier, ++ custom_inner_attributes ++)] ++#![allow(non_camel_case_types, non_snake_case, ++ clippy::cast_possible_truncation, ++ clippy::cast_lossless, ++ clippy::cast_possible_wrap, ++ clippy::cast_precision_loss, ++ // This lint is currently broken for generic code ++ // See https://github.com/rust-lang/rust-clippy/issues/3410 ++ clippy::use_self ++)] ++#![cfg_attr(test, feature(hashmap_internals))] ++#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)] ++#![no_std] ++ ++use cfg_if::cfg_if; ++ ++cfg_if! { ++ if #[cfg(feature = "core_arch")] { ++ #[allow(unused_imports)] ++ use core_arch as arch; ++ } else { ++ #[allow(unused_imports)] ++ use core::arch; ++ } ++} ++ ++#[cfg(all(target_arch = "wasm32", test))] ++use wasm_bindgen_test::*; ++ ++#[allow(unused_imports)] ++use core::{ ++ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, ++ i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice, ++ u128, u16, u32, u64, u8, usize, ++}; ++ ++#[macro_use] ++mod testing; ++#[macro_use] ++mod api; ++mod codegen; ++mod sealed; ++ ++/// Packed SIMD vector type. ++/// ++/// # Examples ++/// ++/// ``` ++/// # use packed_simd::Simd; ++/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3); ++/// assert_eq!(v.extract(2), 2); ++/// ``` ++#[repr(transparent)] ++#[derive(Copy, Clone)] ++pub struct Simd( ++ // FIXME: this type should be private, ++ // but it currently must be public for the ++ // `shuffle!` macro to work: it needs to ++ // access the internal `repr(simd)` type ++ // to call the shuffle intrinsics. ++ #[doc(hidden)] pub ::Tuple, ++); ++ ++/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd` ++/// and/or `Ord` traits. ++#[repr(transparent)] ++#[derive(Copy, Clone, Debug)] ++#[allow(clippy::missing_inline_in_public_items)] ++pub struct LexicographicallyOrdered(T); ++ ++mod masks; ++pub use self::masks::*; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; ++ ++mod vSize; ++pub use self::vSize::*; ++ ++mod vPtr; ++pub use self::vPtr::*; ++ ++pub use self::api::cast::*; ++ ++#[cfg(feature = "into_bits")] ++pub use self::api::into_bits::*; ++ ++// Re-export the shuffle intrinsics required by the `shuffle!` macro. ++#[doc(hidden)] ++pub use self::codegen::llvm::{ ++ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, ++ __shuffle_vector4, __shuffle_vector64, __shuffle_vector8, ++}; ++ ++crate mod llvm { ++ crate use crate::codegen::llvm::*; ++} +diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs +new file mode 100644 +index 000000000000..f83c4da95750 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/masks.rs +@@ -0,0 +1,128 @@ ++//! Mask types ++ ++macro_rules! impl_mask_ty { ++ ($id:ident : $elem_ty:ident | #[$doc:meta]) => { ++ #[$doc] ++ #[derive(Copy, Clone)] ++ pub struct $id($elem_ty); ++ ++ impl crate::sealed::Mask for $id { ++ fn test(&self) -> bool { ++ $id::test(self) ++ } ++ } ++ ++ impl $id { ++ /// Instantiate a mask with `value` ++ #[inline] ++ pub fn new(x: bool) -> Self { ++ if x { ++ $id(!0) ++ } else { ++ $id(0) ++ } ++ } ++ /// Test if the mask is set ++ #[inline] ++ pub fn test(&self) -> bool { ++ self.0 != 0 ++ } ++ } ++ ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ $id(0) ++ } ++ } ++ ++ #[allow(clippy::partialeq_ne_impl)] ++ impl PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ impl Eq for $id {} ++ ++ impl PartialOrd<$id> for $id { ++ #[inline] ++ fn partial_cmp( ++ &self, other: &Self, ++ ) -> Option { ++ use crate::cmp::Ordering; ++ if self == other { ++ Some(Ordering::Equal) ++ } else if self.0 > other.0 { ++ // Note: ++ // * false = 0_i ++ // * true == !0_i == -1_i ++ Some(Ordering::Less) ++ } else { ++ Some(Ordering::Greater) ++ } ++ } ++ ++ #[inline] ++ fn lt(&self, other: &Self) -> bool { ++ self.0 > other.0 ++ } ++ #[inline] ++ fn gt(&self, other: &Self) -> bool { ++ self.0 < other.0 ++ } ++ #[inline] ++ fn le(&self, other: &Self) -> bool { ++ self.0 >= other.0 ++ } ++ #[inline] ++ fn ge(&self, other: &Self) -> bool { ++ self.0 <= other.0 ++ } ++ } ++ ++ impl Ord for $id { ++ #[inline] ++ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { ++ match self.partial_cmp(other) { ++ Some(x) => x, ++ None => unsafe { crate::hint::unreachable_unchecked() }, ++ } ++ } ++ } ++ ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ (self.0 != 0).hash(state); ++ } ++ } ++ ++ impl crate::fmt::Debug for $id { ++ #[inline] ++ fn fmt( ++ &self, fmtter: &mut crate::fmt::Formatter<'_>, ++ ) -> Result<(), crate::fmt::Error> { ++ write!(fmtter, "{}({})", stringify!($id), self.0 != 0) ++ } ++ } ++ }; ++} ++ ++impl_mask_ty!(m8: i8 | /// 8-bit wide mask. ++); ++impl_mask_ty!(m16: i16 | /// 16-bit wide mask. ++); ++impl_mask_ty!(m32: i32 | /// 32-bit wide mask. ++); ++impl_mask_ty!(m64: i64 | /// 64-bit wide mask. ++); ++impl_mask_ty!(m128: i128 | /// 128-bit wide mask. ++); ++impl_mask_ty!(msize: isize | /// isize-wide mask. ++); +diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs +new file mode 100644 +index 000000000000..832acd3f1d54 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/sealed.rs +@@ -0,0 +1,41 @@ ++//! Sealed traits ++ ++/// Trait implemented by arrays that can be SIMD types. ++#[doc(hidden)] ++pub trait SimdArray { ++ /// The type of the #[repr(simd)] type. ++ type Tuple: Copy + Clone; ++ /// The element type of the vector. ++ type T; ++ /// The number of elements in the array. ++ const N: usize; ++ /// The type: `[u32; Self::N]`. ++ type NT; ++} ++ ++/// This traits is used to constraint the arguments ++/// and result type of the portable shuffles. ++#[doc(hidden)] ++pub trait Shuffle { ++ // Lanes is a `[u32; N]` where `N` is the number of vector lanes ++ ++ /// The result type of the shuffle. ++ type Output; ++} ++ ++/// This trait is implemented by all SIMD vector types. ++#[doc(hidden)] ++pub trait Simd { ++ /// Element type of the SIMD vector ++ type Element; ++ /// The number of elements in the SIMD vector. ++ const LANES: usize; ++ /// The type: `[u32; Self::N]`. ++ type LanesType; ++} ++ ++/// This trait is implemented by all mask types ++#[doc(hidden)] ++pub trait Mask { ++ fn test(&self) -> bool; ++} +diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs +new file mode 100644 +index 000000000000..fcbcf9e2ac8e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing.rs +@@ -0,0 +1,8 @@ ++//! Testing macros and other utilities. ++ ++#[macro_use] ++mod macros; ++ ++#[cfg(test)] ++#[macro_use] ++crate mod utils; +diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs +new file mode 100644 +index 000000000000..6008634c76ce +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing/macros.rs +@@ -0,0 +1,44 @@ ++//! Testing macros ++ ++macro_rules! test_if { ++ ($cfg_tt:tt: $it:item) => { ++ #[cfg(any( ++ // Test everything if: ++ // ++ // * tests are enabled, ++ // * no features about exclusively testing ++ // specific vector classes are enabled ++ all(test, not(any( ++ test_v16, ++ test_v32, ++ test_v64, ++ test_v128, ++ test_v256, ++ test_v512, ++ test_none, // disables all tests ++ ))), ++ // Test if: ++ // ++ // * tests are enabled ++ // * a particular cfg token tree returns true ++ all(test, $cfg_tt), ++ ))] ++ $it ++ }; ++} ++ ++#[cfg(test)] ++#[allow(unused)] ++macro_rules! ref_ { ++ ($anything:tt) => { ++ &$anything ++ }; ++} ++ ++#[cfg(test)] ++#[allow(unused)] ++macro_rules! ref_mut_ { ++ ($anything:tt) => { ++ &mut $anything ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs +new file mode 100644 +index 000000000000..7b8f21ac1c55 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing/utils.rs +@@ -0,0 +1,135 @@ ++//! Testing utilities ++ ++#![allow(dead_code)] ++ ++use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered}; ++ ++/// Tests PartialOrd for `a` and `b` where `a < b` is true. ++pub fn test_lt( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++) where ++ LexicographicallyOrdered: Debug + PartialOrd, ++{ ++ assert!(a < b, "{:?}, {:?}", a, b); ++ assert!(b > a, "{:?}, {:?}", a, b); ++ ++ assert!(!(a == b), "{:?}, {:?}", a, b); ++ assert!(a != b, "{:?}, {:?}", a, b); ++ ++ assert!(a <= b, "{:?}, {:?}", a, b); ++ assert!(b >= a, "{:?}, {:?}", a, b); ++ ++ // Irreflexivity ++ assert!(!(a < a), "{:?}, {:?}", a, b); ++ assert!(!(b < b), "{:?}, {:?}", a, b); ++ assert!(!(a > a), "{:?}, {:?}", a, b); ++ assert!(!(b > b), "{:?}, {:?}", a, b); ++ ++ assert!(a <= a, "{:?}, {:?}", a, b); ++ assert!(b <= b, "{:?}, {:?}", a, b); ++} ++ ++/// Tests PartialOrd for `a` and `b` where `a <= b` is true. ++pub fn test_le( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++) where ++ LexicographicallyOrdered: Debug + PartialOrd, ++{ ++ assert!(a <= b, "{:?}, {:?}", a, b); ++ assert!(b >= a, "{:?}, {:?}", a, b); ++ ++ assert!(a == b || a < b, "{:?}, {:?}", a, b); ++ assert!(a == b || b > a, "{:?}, {:?}", a, b); ++ ++ if a == b { ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ } else { ++ assert!(a != b, "{:?}, {:?}", a, b); ++ test_lt(a, b); ++ } ++} ++ ++/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering` ++pub fn test_cmp( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++ o: Option, ++) where ++ LexicographicallyOrdered: PartialOrd + Debug, ++ T: Debug + crate::sealed::Simd + Copy + Clone, ++ ::Element: Default + Copy + Clone + PartialOrd, ++{ ++ assert!(T::LANES <= 64, "array length in these two arrays needs updating"); ++ let mut arr_a: [T::Element; 64] = [Default::default(); 64]; ++ let mut arr_b: [T::Element; 64] = [Default::default(); 64]; ++ ++ unsafe { ++ crate::ptr::write_unaligned( ++ arr_a.as_mut_ptr() as *mut LexicographicallyOrdered, ++ a, ++ ) ++ } ++ unsafe { ++ crate::ptr::write_unaligned( ++ arr_b.as_mut_ptr() as *mut LexicographicallyOrdered, ++ b, ++ ) ++ } ++ let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]); ++ let result = a.partial_cmp(&b); ++ assert_eq!(expected, result, "{:?}, {:?}", a, b); ++ assert_eq!(o, result, "{:?}, {:?}", a, b); ++ match o { ++ Some(crate::cmp::Ordering::Less) => { ++ test_lt(a, b); ++ test_le(a, b); ++ } ++ Some(crate::cmp::Ordering::Greater) => { ++ test_lt(b, a); ++ test_le(b, a); ++ } ++ Some(crate::cmp::Ordering::Equal) => { ++ assert!(a == b, "{:?}, {:?}", a, b); ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(b < a), "{:?}, {:?}", a, b); ++ assert!(!(a > b), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ ++ test_le(a, b); ++ test_le(b, a); ++ } ++ None => { ++ assert!(!(a == b), "{:?}, {:?}", a, b); ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(a > b), "{:?}, {:?}", a, b); ++ assert!(!(b < a), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ assert!(!(a <= b), "{:?}, {:?}", a, b); ++ assert!(!(b <= a), "{:?}, {:?}", a, b); ++ assert!(!(a >= b), "{:?}, {:?}", a, b); ++ assert!(!(b >= a), "{:?}, {:?}", a, b); ++ } ++ } ++} ++ ++// Returns a tuple containing two distinct pointer values of the same type as ++// the element type of the Simd vector `$id`. ++#[allow(unused)] ++macro_rules! ptr_vals { ++ ($id:ty) => { ++ // expands to an expression ++ #[allow(unused_unsafe)] ++ unsafe { ++ // all bits cleared ++ let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed(); ++ // all bits set ++ let set: <$id as sealed::Simd>::Element = ++ crate::mem::transmute(-1_isize); ++ (clear, set) ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs +new file mode 100644 +index 000000000000..1d0282dc4278 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v128.rs +@@ -0,0 +1,80 @@ ++//! 128-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: | ++ /// A 128-bit vector with 16 `i8` lanes. ++); ++impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: | ++ /// A 128-bit vector with 16 `u8` lanes. ++); ++impl_m!([m8; 16]: m8x16 | i8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m16x16 | ++ /// A 128-bit vector mask with 16 `m8` lanes. ++); ++ ++impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8 | ++ /// A 128-bit vector with 8 `i16` lanes. ++); ++impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8 | ++ /// A 128-bit vector with 8 `u16` lanes. ++); ++impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m32x8 | ++ /// A 128-bit vector mask with 8 `m16` lanes. ++); ++ ++impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4 | ++ /// A 128-bit vector with 4 `i32` lanes. ++); ++impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 | ++ From: u8x4, u16x4 | ++ /// A 128-bit vector with 4 `u32` lanes. ++); ++impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4 | ++ /// A 128-bit vector with 4 `f32` lanes. ++); ++impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m64x4 | ++ /// A 128-bit vector mask with 4 `m32` lanes. ++); ++ ++impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 | ++ /// A 128-bit vector with 2 `i64` lanes. ++); ++impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 | ++ From: u8x2, u16x2, u32x2 | ++ /// A 128-bit vector with 2 `u64` lanes. ++); ++impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 | ++ /// A 128-bit vector with 2 `f64` lanes. ++); ++impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 | ++ From: m8x2, m16x2, m32x2, m128x2 | ++ /// A 128-bit vector mask with 2 `m64` lanes. ++); ++ ++impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector with 1 `i128` lane. ++); ++impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 | ++ From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector with 1 `u128` lane. ++); ++impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 | ++ From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector mask with 1 `m128` lane. ++); +diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs +new file mode 100644 +index 000000000000..67a3832d2530 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v16.rs +@@ -0,0 +1,16 @@ ++//! 16-bit wide vector types ++ ++use crate::*; ++ ++impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 | ++ From: | ++ /// A 16-bit vector with 2 `i8` lanes. ++); ++impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 | ++ From: | ++ /// A 16-bit vector with 2 `u8` lanes. ++); ++impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 | ++ From: m16x2, m32x2, m64x2, m128x2 | ++ /// A 16-bit vector mask with 2 `m8` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs +new file mode 100644 +index 000000000000..6b59336f68b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v256.rs +@@ -0,0 +1,86 @@ ++//! 256-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector with 32 `i8` lanes. ++); ++impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector with 32 `u8` lanes. ++); ++impl_m!([m8; 32]: m8x32 | i8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector mask with 32 `m8` lanes. ++); ++ ++impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16 | ++ /// A 256-bit vector with 16 `i16` lanes. ++); ++impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: u8x16 | ++ /// A 256-bit vector with 16 `u16` lanes. ++); ++impl_m!([m16; 16]: m16x16 | i16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m8x16 | ++ /// A 256-bit vector mask with 16 `m16` lanes. ++); ++ ++impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8 | ++ /// A 256-bit vector with 8 `i32` lanes. ++); ++impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8, u16x8 | ++ /// A 256-bit vector with 8 `u32` lanes. ++); ++impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8 | ++ /// A 256-bit vector with 8 `f32` lanes. ++); ++impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m16x8 | ++ /// A 256-bit vector mask with 8 `m32` lanes. ++); ++ ++impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 | ++ /// A 256-bit vector with 4 `i64` lanes. ++); ++impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 | ++ From: u8x4, u16x4, u32x4 | ++ /// A 256-bit vector with 4 `u64` lanes. ++); ++impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 | ++ /// A 256-bit vector with 4 `f64` lanes. ++); ++impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m32x4 | ++ /// A 256-bit vector mask with 4 `m64` lanes. ++); ++ ++impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 | ++ /// A 256-bit vector with 2 `i128` lanes. ++); ++impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 | ++ From: u8x2, u16x2, u32x2, u64x2 | ++ /// A 256-bit vector with 2 `u128` lanes. ++); ++impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 | ++ From: m8x2, m16x2, m32x2, m64x2 | ++ /// A 256-bit vector mask with 2 `m128` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs +new file mode 100644 +index 000000000000..09cef9bdd472 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v32.rs +@@ -0,0 +1,29 @@ ++//! 32-bit wide vector types ++ ++use crate::*; ++ ++impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 | ++ From: | ++ /// A 32-bit vector with 4 `i8` lanes. ++); ++impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 | ++ From: | ++ /// A 32-bit vector with 4 `u8` lanes. ++); ++impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 | ++ From: m16x4, m32x4, m64x4 | ++ /// A 32-bit vector mask with 4 `m8` lanes. ++); ++ ++impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 | ++ From: i8x2, u8x2 | ++ /// A 32-bit vector with 2 `i16` lanes. ++); ++impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 | ++ From: u8x2 | ++ /// A 32-bit vector with 2 `u16` lanes. ++); ++impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 | ++ From: m8x2, m32x2, m64x2, m128x2 | ++ /// A 32-bit vector mask with 2 `m16` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs +new file mode 100644 +index 000000000000..b1714aded369 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v512.rs +@@ -0,0 +1,99 @@ ++//! 512-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector with 64 `i8` lanes. ++); ++impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector with 64 `u8` lanes. ++); ++impl_m!([m8; 64]: m8x64 | i8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector mask with 64 `m8` lanes. ++); ++ ++impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: i8x32, u8x32 | ++ /// A 512-bit vector with 32 `i16` lanes. ++); ++impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: u8x32 | ++ /// A 512-bit vector with 32 `u16` lanes. ++); ++impl_m!([m16; 32]: m16x32 | i16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: m8x32 | ++ /// A 512-bit vector mask with 32 `m16` lanes. ++); ++ ++impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16, i16x16, u16x16 | ++ /// A 512-bit vector with 16 `i32` lanes. ++); ++impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: u8x16, u16x16 | ++ /// A 512-bit vector with 16 `u32` lanes. ++); ++impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16, i16x16, u16x16 | ++ /// A 512-bit vector with 16 `f32` lanes. ++); ++impl_m!([m32; 16]: m32x16 | i32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m8x16, m16x16 | ++ /// A 512-bit vector mask with 16 `m32` lanes. ++); ++ ++impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 | ++ /// A 512-bit vector with 8 `i64` lanes. ++); ++impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8, u16x8, u32x8 | ++ /// A 512-bit vector with 8 `u64` lanes. ++); ++impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 | ++ /// A 512-bit vector with 8 `f64` lanes. ++); ++impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m16x8, m32x8 | ++ /// A 512-bit vector mask with 8 `m64` lanes. ++); ++ ++impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 | ++ /// A 512-bit vector with 4 `i128` lanes. ++); ++impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 | ++ From: u8x4, u16x4, u32x4, u64x4 | ++ /// A 512-bit vector with 4 `u128` lanes. ++); ++impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m32x4, m64x4 | ++ /// A 512-bit vector mask with 4 `m128` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs +new file mode 100644 +index 000000000000..1ee6219c040b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v64.rs +@@ -0,0 +1,66 @@ ++//! 64-bit wide vector types ++#![rustfmt::skip] ++ ++use super::*; ++ ++impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A 64-bit vector with 8 `i8` lanes. ++); ++impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A 64-bit vector with 8 `u8` lanes. ++); ++impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m16x8, m32x8 | ++ /// A 64-bit vector mask with 8 `m8` lanes. ++); ++ ++impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 | ++ From: i8x4, u8x4 | ++ /// A 64-bit vector with 4 `i16` lanes. ++); ++impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 | ++ From: u8x4 | ++ /// A 64-bit vector with 4 `u16` lanes. ++); ++impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 | ++ From: m8x4, m32x4, m64x4 | ++ /// A 64-bit vector mask with 4 `m16` lanes. ++); ++ ++impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2 | ++ /// A 64-bit vector with 2 `i32` lanes. ++); ++impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 | ++ From: u8x2, u16x2 | ++ /// A 64-bit vector with 2 `u32` lanes. ++); ++impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 | ++ From: m8x2, m16x2, m64x2, m128x2 | ++ /// A 64-bit vector mask with 2 `m32` lanes. ++); ++impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2 | ++ /// A 64-bit vector with 2 `f32` lanes. ++); ++ ++/* ++impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion ++ /// A 64-bit vector with 1 `i64` lanes. ++); ++impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 | ++ From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion ++ /// A 64-bit vector with 1 `u64` lanes. ++); ++impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 | ++ From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types ++ /// A 64-bit vector mask with 1 `m64` lanes. ++); ++impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types ++ /// A 64-bit vector with 1 `f64` lanes. ++); ++*/ +diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs +new file mode 100644 +index 000000000000..fe9fb28ffa89 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/vPtr.rs +@@ -0,0 +1,34 @@ ++//! Vectors of pointers ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_const_p!( ++ [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | ++ /// A vector with 2 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | ++ /// A vector with 2 `*mut T` lanes ++); ++ ++impl_const_p!( ++ [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | ++ /// A vector with 4 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | ++ /// A vector with 4 `*mut T` lanes ++); ++ ++impl_const_p!( ++ [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | ++ /// A vector with 8 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | ++ /// A vector with 8 `*mut T` lanes ++); +diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs +new file mode 100644 +index 000000000000..5594323372b4 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/vSize.rs +@@ -0,0 +1,53 @@ ++//! Vectors with pointer-sized elements ++ ++use crate::codegen::pointer_sized_int::{isize_, usize_}; ++use crate::*; ++ ++impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 | ++ x0, x1| ++ From: | ++ /// A vector with 2 `isize` lanes. ++); ++ ++impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 | ++ x0, x1| ++ From: | ++ /// A vector with 2 `usize` lanes. ++); ++impl_m!([msize; 2]: msizex2 | isize_ | test_v128 | ++ x0, x1 | ++ From: | ++ /// A vector mask with 2 `msize` lanes. ++); ++ ++impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 | ++ x0, x1, x2, x3 | ++ From: | ++ /// A vector with 4 `isize` lanes. ++); ++impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 | ++ x0, x1, x2, x3| ++ From: | ++ /// A vector with 4 `usize` lanes. ++); ++impl_m!([msize; 4]: msizex4 | isize_ | test_v256 | ++ x0, x1, x2, x3 | ++ From: | ++ /// A vector mask with 4 `msize` lanes. ++); ++ ++impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector with 4 `isize` lanes. ++); ++impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector with 8 `usize` lanes. ++); ++impl_m!([msize; 8]: msizex8 | isize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector mask with 8 `msize` lanes. ++); +diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs +new file mode 100644 +index 000000000000..1e6b4f354301 +--- /dev/null ++++ b/third_party/rust/packed_simd/tests/endianness.rs +@@ -0,0 +1,262 @@ ++#[cfg(target_arch = "wasm32")] ++use wasm_bindgen_test::*; ++ ++use packed_simd::*; ++use std::{mem, slice}; ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_indexing() { ++ let v = i32x4::new(0, 1, 2, 3); ++ assert_eq!(v.extract(0), 0); ++ assert_eq!(v.extract(1), 1); ++ assert_eq!(v.extract(2), 2); ++ assert_eq!(v.extract(3), 3); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_bitcasts() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let t: i16x8 = unsafe { mem::transmute(x) }; ++ let e: i16x8 = if cfg!(target_endian = "little") { ++ i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) ++ } else { ++ i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) ++ }; ++ assert_eq!(t, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_casts() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let t: i16x16 = x.into(); // simd_cast ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i16x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ assert_eq!(t, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_load_and_stores() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let mut y: [i16; 8] = [0; 8]; ++ x.write_to_slice_unaligned(unsafe { ++ slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) ++ }); ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(y, e); ++ ++ let z = i8x16::from_slice_unaligned(unsafe { ++ slice::from_raw_parts(&y as *const _ as *const i8, 16) ++ }); ++ assert_eq!(z, x); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_array_union() { ++ union A { ++ data: [f32; 4], ++ vec: f32x4, ++ } ++ let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; ++ assert_eq!(x[0], 0_f32); ++ assert_eq!(x[1], 1_f32); ++ assert_eq!(x[2], 2_f32); ++ assert_eq!(x[3], 3_f32); ++ let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec }; ++ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); ++ ++ union B { ++ data: [i8; 16], ++ vec: i8x16, ++ } ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: [i8; 16] = unsafe { B { vec: x }.data }; ++ ++ for i in 0..16 { ++ assert_eq!(x[i], i as i8); ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let y = [ ++ 15, 14, 13, 12, 11, 19, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]; ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i8x16::new( ++ 15, 14, 13, 12, 11, 19, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ let z = unsafe { B { data: y }.vec }; ++ assert_eq!(z, e); ++ ++ union C { ++ data: [i16; 8], ++ vec: i8x16, ++ } ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: [i16; 8] = unsafe { C { vec: x }.data }; ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(x, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_tuple_access() { ++ type F32x4T = (f32, f32, f32, f32); ++ union A { ++ data: F32x4T, ++ vec: f32x4, ++ } ++ let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; ++ assert_eq!(x.0, 0_f32); ++ assert_eq!(x.1, 1_f32); ++ assert_eq!(x.2, 2_f32); ++ assert_eq!(x.3, 3_f32); ++ let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec }; ++ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); ++ union B { ++ data: I8x16T, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: I8x16T = unsafe { B { vec: x }.data }; ++ ++ assert_eq!(x.0, 0); ++ assert_eq!(x.1, 1); ++ assert_eq!(x.2, 2); ++ assert_eq!(x.3, 3); ++ assert_eq!(x.4, 4); ++ assert_eq!(x.5, 5); ++ assert_eq!(x.6, 6); ++ assert_eq!(x.7, 7); ++ assert_eq!(x.8, 8); ++ assert_eq!(x.9, 9); ++ assert_eq!(x.10, 10); ++ assert_eq!(x.11, 11); ++ assert_eq!(x.12, 12); ++ assert_eq!(x.13, 13); ++ assert_eq!(x.14, 14); ++ assert_eq!(x.15, 15); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let y = ( ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ let z: i8x16 = unsafe { B { data: y }.vec }; ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i8x16::new( ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ assert_eq!(e, z); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); ++ union C { ++ data: I16x8T, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: I16x8T = unsafe { C { vec: x }.data }; ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(x.0, e[0]); ++ assert_eq!(x.1, e[1]); ++ assert_eq!(x.2, e[2]); ++ assert_eq!(x.3, e[3]); ++ assert_eq!(x.4, e[4]); ++ assert_eq!(x.5, e[5]); ++ assert_eq!(x.6, e[6]); ++ assert_eq!(x.7, e[7]); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ #[repr(C)] ++ #[derive(Copy ,Clone)] ++ pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, ++ pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); ++ ++ union D { ++ data: Tup, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: Tup = unsafe { D { vec: x }.data }; ++ ++ let e: [i16; 12] = if cfg!(target_endian = "little") { ++ [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] ++ } else { ++ [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] ++ }; ++ assert_eq!(x.0 as i16, e[0]); ++ assert_eq!(x.1 as i16, e[1]); ++ assert_eq!(x.2 as i16, e[2]); ++ assert_eq!(x.3 as i16, e[3]); ++ assert_eq!(x.4 as i16, e[4]); ++ assert_eq!(x.5 as i16, e[5]); ++ assert_eq!(x.6 as i16, e[6]); ++ assert_eq!(x.7 as i16, e[7]); ++ assert_eq!(x.8 as i16, e[8]); ++ assert_eq!(x.9 as i16, e[9]); ++ assert_eq!(x.10 as i16, e[10]); ++ assert_eq!(x.11 as i16, e[11]); ++} +diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json +deleted file mode 100644 +index 5e8c154cda72..000000000000 +--- a/third_party/rust/simd/.cargo-checksum.json ++++ /dev/null +@@ -1 +0,0 @@ +-{"files":{"Cargo.toml":"0c7a480c62d7b42604098fa1dd6453be79629112569c494efa75d7fd0998fd69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"f426ca32bb44fee39d83d51e481fe6b2640d4b78cb097c952cd75800b886f2fd","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","build.rs":"69c9c9029ca969a4bb3f11429bc1424fa75af46143eb0d853b4db3a512859b32","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"71d0bb6dac5f58599bb825449701a05cf32f6eca1918e80d060b746e69751c37","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"a24a207e65468de2189297380747e2f2f33ec2317f4b83f0665d34b1c09feb08","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"} +\ No newline at end of file +diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml +deleted file mode 100644 +index 30279b93556c..000000000000 +--- a/third_party/rust/simd/Cargo.toml ++++ /dev/null +@@ -1,37 +0,0 @@ +-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +-# +-# When uploading crates to the registry Cargo will automatically +-# "normalize" Cargo.toml files for maximal compatibility +-# with all versions of Cargo and also rewrite `path` dependencies +-# to registry (e.g. crates.io) dependencies +-# +-# If you believe there's an error in this file please file an +-# issue against the rust-lang/cargo repository. If you're +-# editing this file be aware that the upstream Cargo.toml +-# will likely look very different (and much more reasonable) +- +-[package] +-name = "simd" +-version = "0.2.3" +-authors = ["Huon Wilson "] +-description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n(To be obsoleted by the `std::simd` implementation RFC 2366.)\n" +-documentation = "https://docs.rs/simd/" +-readme = "README.md" +-keywords = ["simd", "data-parallel"] +-license = "MIT/Apache-2.0" +-repository = "https://github.com/hsivonen/simd" +-[package.metadata.docs.rs] +-features = ["doc"] +-[dependencies.serde] +-version = "1.0" +-optional = true +- +-[dependencies.serde_derive] +-version = "1.0" +-optional = true +-[dev-dependencies.cfg-if] +-version = "0.1" +- +-[features] +-doc = [] +-with-serde = ["serde", "serde_derive"] +diff --git a/third_party/rust/simd/README.md b/third_party/rust/simd/README.md +deleted file mode 100644 +index 1c34f49bcd91..000000000000 +--- a/third_party/rust/simd/README.md ++++ /dev/null +@@ -1,11 +0,0 @@ +-# `simd` +- +-[![Build Status](https://travis-ci.org/hsivonen/simd.svg?branch=master)](https://travis-ci.org/hsivonen/simd) +-[![crates.io](https://meritbadge.herokuapp.com/simd)](https://crates.io/crates/simd) +-[![docs.rs](https://docs.rs/simd/badge.svg)](https://docs.rs/simd/) +- +-`simd` offers a basic interface to the SIMD functionality of CPUs. (Note: This crate fails to build unless the target is aarch64, x86_64, i686 (i.e. SSE2 enabled; not i586) or an ARMv7 target (thumb or not) with NEON enabled.) +- +-This crate is expected to become _obsolete_ once the implementation of [RFC 2366](https://github.com/rust-lang/rfcs/pull/2366) lands in the standard library. +- +-[Documentation](https://docs.rs/simd) +diff --git a/third_party/rust/simd/benches/mandelbrot.rs b/third_party/rust/simd/benches/mandelbrot.rs +deleted file mode 100755 +index 61061a4a301f..000000000000 +--- a/third_party/rust/simd/benches/mandelbrot.rs ++++ /dev/null +@@ -1,117 +0,0 @@ +-#![feature(test)] +-#![feature(cfg_target_feature)] +- +-extern crate simd; +-extern crate test; +- +-use test::black_box as bb; +-use test::Bencher as B; +-use simd::{f32x4, u32x4}; +-#[cfg(any(target_feature = "avx", target_feature = "avx2"))] +-use simd::x86::avx::{f32x8, u32x8}; +- +-fn naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { +- let mut x = c_x; +- let mut y = c_y; +- let mut count = 0; +- while count < max_iter { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- if sum > 4.0 { +- break +- } +- count += 1; +- x = xx - yy + c_x; +- y = xy * 2.0 + c_y; +- } +- count +-} +- +-fn simd4(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x4::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x4::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x4::splat(1), u32x4::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-#[cfg(target_feature = "avx")] +-fn simd8(c_x: f32x8, c_y: f32x8, max_iter: u32) -> u32x8 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x8::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x8::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x8::splat(1), u32x8::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-const SCALE: f32 = 3.0 / 100.0; +-const N: u32 = 100; +-#[bench] +-fn mandel_naive(b: &mut B) { +- b.iter(|| { +- for j in 0..100 { +- let y = -1.5 + (j as f32) * SCALE; +- for i in 0..100 { +- let x = -2.2 + (i as f32) * SCALE; +- bb(naive(x, y, N)); +- } +- } +- }) +-} +-#[bench] +-fn mandel_simd4(b: &mut B) { +- let tweak = u32x4::new(0, 1, 2, 3); +- b.iter(|| { +- for j in 0..100 { +- let y = f32x4::splat(-1.5) + f32x4::splat(SCALE) * u32x4::splat(j).to_f32(); +- for i in 0..25 { +- let i = u32x4::splat(i * 4) + tweak; +- let x = f32x4::splat(-2.2) + f32x4::splat(SCALE) * i.to_f32(); +- bb(simd4(x, y, N)); +- } +- } +- }) +-} +-#[cfg(any(target_feature = "avx", target_feature = "avx2"))] +-#[bench] +-fn mandel_simd8(b: &mut B) { +- let tweak = u32x8::new(0, 1, 2, 3, 4, 5, 6, 7); +- b.iter(|| { +- for j in 0..100 { +- let y = f32x8::splat(-1.5) + f32x8::splat(SCALE) * u32x8::splat(j).to_f32(); +- for i in 0..13 { // 100 not divisible by 8 :( +- let i = u32x8::splat(i * 8) + tweak; +- let x = f32x8::splat(-2.2) + f32x8::splat(SCALE) * i.to_f32(); +- bb(simd8(x, y, N)); +- } +- } +- }) +-} +diff --git a/third_party/rust/simd/benches/matrix.rs b/third_party/rust/simd/benches/matrix.rs +deleted file mode 100755 +index 36aa88237492..000000000000 +--- a/third_party/rust/simd/benches/matrix.rs ++++ /dev/null +@@ -1,485 +0,0 @@ +-#![feature(test)] +-#![feature(cfg_target_feature)] +-extern crate test; +-extern crate simd; +- +-use test::black_box as bb; +-use test::Bencher as B; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::{f32x8, f64x4}; +-// #[cfg(target_feature = "avx2")] +-// use simd::x86::avx2::Avx2F32x8; +- +- +-#[bench] +-fn multiply_naive(b: &mut B) { +- let x = [[1.0_f32; 4]; 4]; +- let y = [[2.0; 4]; 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- bb(&[[x[0][0] * y[0][0] + x[1][0] * y[0][1] + x[2][0] * y[0][2] + x[3][0] * y[0][3], +- x[0][1] * y[0][0] + x[1][1] * y[0][1] + x[2][1] * y[0][2] + x[3][1] * y[0][3], +- x[0][2] * y[0][0] + x[1][2] * y[0][1] + x[2][2] * y[0][2] + x[3][2] * y[0][3], +- x[0][3] * y[0][0] + x[1][3] * y[0][1] + x[2][3] * y[0][2] + x[3][3] * y[0][3]], +- [x[0][0] * y[1][0] + x[1][0] * y[1][1] + x[2][0] * y[1][2] + x[3][0] * y[1][3], +- x[0][1] * y[1][0] + x[1][1] * y[1][1] + x[2][1] * y[1][2] + x[3][1] * y[1][3], +- x[0][2] * y[1][0] + x[1][2] * y[1][1] + x[2][2] * y[1][2] + x[3][2] * y[1][3], +- x[0][3] * y[1][0] + x[1][3] * y[1][1] + x[2][3] * y[1][2] + x[3][3] * y[1][3]], +- [x[0][0] * y[2][0] + x[1][0] * y[2][1] + x[2][0] * y[2][2] + x[3][0] * y[2][3], +- x[0][1] * y[2][0] + x[1][1] * y[2][1] + x[2][1] * y[2][2] + x[3][1] * y[2][3], +- x[0][2] * y[2][0] + x[1][2] * y[2][1] + x[2][2] * y[2][2] + x[3][2] * y[2][3], +- x[0][3] * y[2][0] + x[1][3] * y[2][1] + x[2][3] * y[2][2] + x[3][3] * y[2][3]], +- [x[0][0] * y[3][0] + x[1][0] * y[3][1] + x[2][0] * y[3][2] + x[3][0] * y[3][3], +- x[0][1] * y[3][0] + x[1][1] * y[3][1] + x[2][1] * y[3][2] + x[3][1] * y[3][3], +- x[0][2] * y[3][0] + x[1][2] * y[3][1] + x[2][2] * y[3][2] + x[3][2] * y[3][3], +- x[0][3] * y[3][0] + x[1][3] * y[3][1] + x[2][3] * y[3][2] + x[3][3] * y[3][3]], +- ]); +- } +- }) +-} +- +-#[bench] +-fn multiply_simd4_32(b: &mut B) { +- let x = [f32x4::splat(1.0_f32); 4]; +- let y = [f32x4::splat(2.0); 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- bb(&[f32x4::splat(y0.extract(0)) * x[0] + +- f32x4::splat(y0.extract(1)) * x[1] + +- f32x4::splat(y0.extract(2)) * x[2] + +- f32x4::splat(y0.extract(3)) * x[3], +- f32x4::splat(y1.extract(0)) * x[0] + +- f32x4::splat(y1.extract(1)) * x[1] + +- f32x4::splat(y1.extract(2)) * x[2] + +- f32x4::splat(y1.extract(3)) * x[3], +- f32x4::splat(y2.extract(0)) * x[0] + +- f32x4::splat(y2.extract(1)) * x[1] + +- f32x4::splat(y2.extract(2)) * x[2] + +- f32x4::splat(y2.extract(3)) * x[3], +- f32x4::splat(y3.extract(0)) * x[0] + +- f32x4::splat(y3.extract(1)) * x[1] + +- f32x4::splat(y3.extract(2)) * x[2] + +- f32x4::splat(y3.extract(3)) * x[3], +- ]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn multiply_simd4_64(b: &mut B) { +- let x = [f64x4::splat(1.0_f64); 4]; +- let y = [f64x4::splat(2.0); 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- bb(&[f64x4::splat(y0.extract(0)) * x[0] + +- f64x4::splat(y0.extract(1)) * x[1] + +- f64x4::splat(y0.extract(2)) * x[2] + +- f64x4::splat(y0.extract(3)) * x[3], +- f64x4::splat(y1.extract(0)) * x[0] + +- f64x4::splat(y1.extract(1)) * x[1] + +- f64x4::splat(y1.extract(2)) * x[2] + +- f64x4::splat(y1.extract(3)) * x[3], +- f64x4::splat(y2.extract(0)) * x[0] + +- f64x4::splat(y2.extract(1)) * x[1] + +- f64x4::splat(y2.extract(2)) * x[2] + +- f64x4::splat(y2.extract(3)) * x[3], +- f64x4::splat(y3.extract(0)) * x[0] + +- f64x4::splat(y3.extract(1)) * x[1] + +- f64x4::splat(y3.extract(2)) * x[2] + +- f64x4::splat(y3.extract(3)) * x[3], +- ]); +- } +- }) +-} +- +-#[bench] +-fn inverse_naive(b: &mut B) { +- let mut x = [[0_f32; 4]; 4]; +- for i in 0..4 { x[i][i] = 1.0 } +- +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- +- let mut t = [[0_f32; 4]; 4]; +- for i in 0..4 { +- t[0][i] = x[i][0]; +- t[1][i] = x[i][1]; +- t[2][i] = x[i][2]; +- t[3][i] = x[i][3]; +- } +- +- let _0 = t[2][2] * t[3][3]; +- let _1 = t[2][3] * t[3][2]; +- let _2 = t[2][1] * t[3][3]; +- let _3 = t[2][3] * t[3][1]; +- let _4 = t[2][1] * t[3][2]; +- let _5 = t[2][2] * t[3][1]; +- let _6 = t[2][0] * t[3][3]; +- let _7 = t[2][3] * t[3][0]; +- let _8 = t[2][0] * t[3][2]; +- let _9 = t[2][2] * t[3][0]; +- let _10 = t[2][0] * t[3][1]; +- let _11 = t[2][1] * t[3][0]; +- +- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - +- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); +- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - +- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); +- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - +- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); +- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - +- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); +- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - +- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); +- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - +- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); +- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - +- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); +- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - +- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); +- +- let _0 = t[0][2] * t[1][3]; +- let _1 = t[0][3] * t[1][2]; +- let _2 = t[0][1] * t[1][3]; +- let _3 = t[0][3] * t[1][1]; +- let _4 = t[0][1] * t[1][2]; +- let _5 = t[0][2] * t[1][1]; +- let _6 = t[0][0] * t[1][3]; +- let _7 = t[0][3] * t[1][0]; +- let _8 = t[0][0] * t[1][2]; +- let _9 = t[0][2] * t[1][0]; +- let _10 = t[0][0] * t[1][1]; +- let _11 = t[0][1] * t[1][0]; +- +- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- +- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); +- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- +- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); +- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- +- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); +- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- +- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); +- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- +- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); +- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- +- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); +- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- +- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); +- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- +- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); +- +- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; +- +- let det = 1.0 / det; +- let mut ret = [[d00, d01, d02, d03], +- [d10, d11, d12, d13], +- [d20, d21, d22, d23], +- [d30, d31, d32, d33]]; +- for i in 0..4 { +- for j in 0..4 { +- ret[i][j] *= det; +- } +- } +- bb(&ret); +- } +- }) +-} +- +-#[bench] +-fn inverse_simd4(b: &mut B) { +- let mut x = [f32x4::splat(0_f32); 4]; +- for i in 0..4 { x[i] = x[i].replace(i as u32, 1.0); } +- +- fn shuf0145(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(1), +- w.extract(4 - 4), w.extract(5 - 4)) +- } +- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(2), +- w.extract(4 - 4), w.extract(6 - 4)) +- } +- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(3), +- w.extract(5 - 4), w.extract(7 - 4)) +- } +- fn shuf2367(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(2), v.extract(3), +- w.extract(6 - 4), w.extract(7 - 4)) +- } +- +- fn swiz1032(v: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(0), +- v.extract(3), v.extract(2)) +- } +- fn swiz2301(v: f32x4) -> f32x4 { +- f32x4::new(v.extract(2), v.extract(3), +- v.extract(0), v.extract(1)) +- } +- +- b.iter(|| { +- for _ in 0..100 { +- let src0; +- let src1; +- let src2; +- let src3; +- let mut tmp1; +- let row0; +- let mut row1; +- let mut row2; +- let mut row3; +- let mut minor0; +- let mut minor1; +- let mut minor2; +- let mut minor3; +- let mut det; +- +- let x = bb(&x); +- src0 = x[0]; +- src1 = x[1]; +- src2 = x[2]; +- src3 = x[3]; +- +- tmp1 = shuf0145(src0, src1); +- row1 = shuf0145(src2, src3); +- row0 = shuf0246(tmp1, row1); +- row1 = shuf1357(row1, tmp1); +- +- tmp1 = shuf2367(src0, src1); +- row3 = shuf2367(src2, src3); +- row2 = shuf0246(tmp1, row3); +- row3 = shuf0246(row3, tmp1); +- +- +- tmp1 = row2 * row3; +- tmp1 = swiz1032(tmp1); +- minor0 = row1 * tmp1; +- minor1 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor0 = (row1 * tmp1) - minor0; +- minor1 = (row0 * tmp1) - minor1; +- minor1 = swiz2301(minor1); +- +- +- tmp1 = row1 * row2; +- tmp1 = swiz1032(tmp1); +- minor0 = (row3 * tmp1) + minor0; +- minor3 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- +- minor0 = minor0 - row3 * tmp1; +- minor3 = row0 * tmp1 - minor3; +- minor3 = swiz2301(minor3); +- +- +- tmp1 = row3 * swiz2301(row1); +- tmp1 = swiz1032(tmp1); +- row2 = swiz2301(row2); +- minor0 = row2 * tmp1 + minor0; +- minor2 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor0 = minor0 - row2 * tmp1; +- minor2 = row0 * tmp1 - minor2; +- minor2 = swiz2301(minor2); +- +- +- tmp1 = row0 * row1; +- tmp1 = swiz1032(tmp1); +- minor2 = minor2 + row3 * tmp1; +- minor3 = row2 * tmp1 - minor3; +- tmp1 = swiz2301(tmp1); +- minor2 = row3 * tmp1 - minor2; +- minor3 = minor3 - row2 * tmp1; +- +- +- +- tmp1 = row0 * row3; +- tmp1 = swiz1032(tmp1); +- minor1 = minor1 - row2 * tmp1; +- minor2 = row1 * tmp1 + minor2; +- tmp1 = swiz2301(tmp1); +- minor1 = row2 * tmp1 + minor1; +- minor2 = minor2 - row1 * tmp1; +- +- tmp1 = row0 * row2; +- tmp1 = swiz1032(tmp1); +- minor1 = row3 * tmp1 + minor1; +- minor3 = minor3 - row1 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor1 = minor1 - row3 * tmp1; +- minor3 = row1 * tmp1 + minor3; +- +- det = row0 * minor0; +- det = swiz2301(det) + det; +- det = swiz1032(det) + det; +- //tmp1 = det.approx_reciprocal(); det = tmp1 * (f32x4::splat(2.0) - det * tmp1); +- det = f32x4::splat(1.0) / det; +- +- bb(&[minor0 * det, minor1 * det, minor2 * det, minor3 * det]); +- } +- }) +- +-} +- +-#[bench] +-fn transpose_naive(b: &mut B) { +- let x = [[0_f32; 4]; 4]; +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- bb(&[[x[0][0], x[1][0], x[2][0], x[3][0]], +- [x[0][1], x[1][1], x[2][1], x[3][1]], +- [x[0][2], x[1][2], x[2][2], x[3][2]], +- [x[0][3], x[1][3], x[2][3], x[3][3]]]); +- } +- }) +-} +- +-#[bench] +-fn transpose_simd4(b: &mut B) { +- let x = [f32x4::splat(0_f32); 4]; +- +- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(2), +- w.extract(4 - 4), w.extract(6 - 4)) +- } +- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(3), +- w.extract(5 - 4), w.extract(7 - 4)) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x0 = x[0]; +- let x1 = x[1]; +- let x2 = x[2]; +- let x3 = x[3]; +- +- let a0 = shuf0246(x0, x1); +- let a1 = shuf0246(x2, x3); +- let a2 = shuf1357(x0, x1); +- let a3 = shuf1357(x2, x3); +- +- let b0 = shuf0246(a0, a1); +- let b1 = shuf0246(a2, a3); +- let b2 = shuf1357(a0, a1); +- let b3 = shuf1357(a2, a3); +- bb(&[b0, b1, b2, b3]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_naive(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- fn shuf0246(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(2), v.extract(4), v.extract(6), +- w.extract(0), w.extract(2), w.extract(4), w.extract(6)) +- } +- fn shuf1357(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(1), v.extract(3), v.extract(5), v.extract(7), +- w.extract(1), w.extract(3), w.extract(5), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = shuf0246(x01, x23); +- let a23 = shuf1357(x01, x23); +- +- let b01 = shuf0246(a01, a23); +- let b23 = shuf1357(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_avx2_vpermps(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- // efficient on AVX2 using vpermps +- fn perm04152637(v: f32x8) -> f32x8 { +- // broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) +- // v.permutevar(i32x8::new(0, 4, 1, 5, 2, 6, 3, 7)) +- f32x8::new(v.extract(0), v.extract(4), v.extract(1), v.extract(5), +- v.extract(2), v.extract(6), v.extract(3), v.extract(7)) +- } +- fn shuf_lo(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(1), w.extract(0), w.extract(1), +- v.extract(4), v.extract(5), w.extract(4), w.extract(5),) +- } +- fn shuf_hi(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(2), v.extract(3), w.extract(2), w.extract(3), +- v.extract(6), v.extract(7), w.extract(6), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = perm04152637(x01); +- let a23 = perm04152637(x23); +- +- let b01 = shuf_lo(a01, a23); +- let b23 = shuf_hi(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_avx2_vpermpd(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- // efficient on AVX2 using vpermpd +- fn perm01452367(v: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(1), v.extract(4), v.extract(5), +- v.extract(2), v.extract(3), v.extract(6), v.extract(7)) +- } +- fn shuf_lo_ps(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), w.extract(0), v.extract(1), w.extract(1), +- v.extract(4), w.extract(4), v.extract(5), w.extract(5),) +- } +- fn shuf_hi_ps(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(2), w.extract(2), v.extract(3), w.extract(3), +- v.extract(6), w.extract(6), v.extract(7), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = perm01452367(x01); +- let a23 = perm01452367(x23); +- +- let b01 = shuf_lo_ps(a01, a23); +- let b23 = shuf_hi_ps(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +diff --git a/third_party/rust/simd/build.rs b/third_party/rust/simd/build.rs +deleted file mode 100644 +index 61b5330a1846..000000000000 +--- a/third_party/rust/simd/build.rs ++++ /dev/null +@@ -1,3 +0,0 @@ +-fn main() { +- println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); +-} +diff --git a/third_party/rust/simd/examples/axpy.rs b/third_party/rust/simd/examples/axpy.rs +deleted file mode 100755 +index 7862721b254d..000000000000 +--- a/third_party/rust/simd/examples/axpy.rs ++++ /dev/null +@@ -1,65 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::f32x8; +- +-#[inline(never)] +-pub fn axpy(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- assert_eq!(x.len(), z.len()); +- +- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); +- +- let mut i = 0; +- while i < len & !3 { +- let x = f32x4::load(x, i); +- let y = f32x4::load(y, i); +- (f32x4::splat(a) * x + y).store(z, i); +- i += 4 +- } +-} +- +-#[cfg(target_feature = "avx")] +-#[inline(never)] +-pub fn axpy8(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- assert_eq!(x.len(), z.len()); +- +- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); +- +- let mut i = 0; +- while i < len & !7 { +- let x = f32x8::load(x, i); +- let y = f32x8::load(y, i); +- (f32x8::splat(a) * x + y).store(z, i); +- i += 8 +- } +-} +- +- +-#[cfg(not(target_feature = "avx"))] +-pub fn axpy8(_: &mut [f32], _: f32, _: &[f32], _: &[f32]) { +- unimplemented!() +-} +- +- +-fn main() { +- let mut z = vec![0.; 4]; +- axpy(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- let mut z = vec![0.; 8]; +- axpy(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- +- if cfg!(target_feature = "avx") { +- let mut z = vec![0.; 4]; +- axpy8(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- let mut z = vec![0.; 8]; +- axpy8(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- } +-} +diff --git a/third_party/rust/simd/examples/convert.rs b/third_party/rust/simd/examples/convert.rs +deleted file mode 100644 +index 11823a4b50d2..000000000000 +--- a/third_party/rust/simd/examples/convert.rs ++++ /dev/null +@@ -1,38 +0,0 @@ +-extern crate simd; +-use simd::f32x4; +- +-#[inline(never)] +-pub fn convert_scalar(x: &mut [i32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- +- let mut i = 0; +- while i < x.len() & !3 { +- x[i] = y[i] as i32; +- i += 1; +- } +-} +- +-#[inline(never)] +-pub fn convert(x: &mut [i32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- +- let mut i = 0; +- while i < x.len() & !3 { +- let v = f32x4::load(y, i); +- v.to_i32().store(x, i); +- i += 4 +- } +-} +- +-fn main() { +- let x = &mut [0; 12]; +- let y = [1.0; 12]; +- convert(x, &y); +- convert_scalar(x, &y); +- println!("{:?}", x); +- let x = &mut [0; 16]; +- let y = [1.0; 16]; +- convert(x, &y); +- convert_scalar(x, &y); +- println!("{:?}", x); +-} +diff --git a/third_party/rust/simd/examples/dot-product.rs b/third_party/rust/simd/examples/dot-product.rs +deleted file mode 100755 +index 9f0e1d35c799..000000000000 +--- a/third_party/rust/simd/examples/dot-product.rs ++++ /dev/null +@@ -1,60 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::{f32x8, LowHigh128}; +- +-#[inline(never)] +-pub fn dot(x: &[f32], y: &[f32]) -> f32 { +- assert_eq!(x.len(), y.len()); +- +- let len = std::cmp::min(x.len(), y.len()); +- +- let mut sum = f32x4::splat(0.0); +- let mut i = 0; +- while i < len & !3 { +- let x = f32x4::load(x, i); +- let y = f32x4::load(y, i); +- sum = sum + x * y; +- i += 4 +- } +- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) +-} +- +-#[cfg(target_feature = "avx")] +-#[inline(never)] +-pub fn dot8(x: &[f32], y: &[f32]) -> f32 { +- assert_eq!(x.len(), y.len()); +- +- let len = std::cmp::min(x.len(), y.len()); +- +- let mut sum = f32x8::splat(0.0); +- let mut i = 0; +- while i < len & !7 { +- let x = f32x8::load(x, i); +- let y = f32x8::load(y, i); +- sum = sum + x * y; +- i += 8 +- } +- let sum = sum.low() + sum.high(); +- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) +-} +- +- +-#[cfg(not(target_feature = "avx"))] +-pub fn dot8(_: &[f32], _: &[f32]) -> f32 { +- unimplemented!() +-} +- +- +-fn main() { +- println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); +- println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); +- +- if cfg!(target_feature = "avx") { +- println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); +- println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); +- } +-} +diff --git a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs b/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs +deleted file mode 100644 +index fa30b2283f93..000000000000 +--- a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs ++++ /dev/null +@@ -1,156 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-use std::{cmp, mem}; +-use std::thread; +- +-fn rotate(x: &mut [i32]) { +- let mut prev = x[0]; +- for place in x.iter_mut().rev() { +- prev = mem::replace(place, prev) +- } +-} +- +-fn next_permutation(perm: &mut [i32], count: &mut [i32]) { +- for i in 1..perm.len() { +- rotate(&mut perm[.. i + 1]); +- let count_i = &mut count[i]; +- if *count_i >= i as i32 { +- *count_i = 0; +- } else { +- *count_i += 1; +- break +- } +- } +-} +- +-#[derive(Clone, Copy)] +-struct P { +- p: [i32; 16], +-} +- +-#[derive(Clone, Copy)] +-struct Perm { +- cnt: [i32; 16], +- fact: [u32; 16], +- n: u32, +- permcount: u32, +- perm: P, +-} +- +-impl Perm { +- fn new(n: u32) -> Perm { +- let mut fact = [1; 16]; +- for i in 1 .. n as usize + 1 { +- fact[i] = fact[i - 1] * i as u32; +- } +- Perm { +- cnt: [0; 16], +- fact: fact, +- n: n, +- permcount: 0, +- perm: P { p: [0; 16 ] } +- } +- } +- +- fn get(&mut self, mut idx: i32) -> P { +- let mut pp = [0u8; 16]; +- self.permcount = idx as u32; +- for (i, place) in self.perm.p.iter_mut().enumerate() { +- *place = i as i32 + 1; +- } +- +- for i in (1 .. self.n as usize).rev() { +- let d = idx / self.fact[i] as i32; +- self.cnt[i] = d; +- idx %= self.fact[i] as i32; +- for (place, val) in pp.iter_mut().zip(self.perm.p[..(i+1)].iter()) { +- *place = (*val) as u8 +- } +- +- let d = d as usize; +- for j in 0 .. i + 1 { +- self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32; +- } +- } +- +- self.perm +- } +- +- fn count(&self) -> u32 { self.permcount } +- fn max(&self) -> u32 { self.fact[self.n as usize] } +- +- fn next(&mut self) -> P { +- next_permutation(&mut self.perm.p, &mut self.cnt); +- self.permcount += 1; +- +- self.perm +- } +-} +- +- +-fn reverse(tperm: &mut [i32], k: usize) { +- tperm[..k].reverse() +-} +- +-fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) { +- let mut checksum = 0; +- let mut maxflips = 0; +- +- let mut p = perm.get(n as i32); +- +- while perm.count() < max as u32 { +- let mut flips = 0; +- +- while p.p[0] != 1 { +- let k = p.p[0] as usize; +- reverse(&mut p.p, k); +- flips += 1; +- } +- +- checksum += if perm.count() % 2 == 0 {flips} else {-flips}; +- maxflips = cmp::max(maxflips, flips); +- +- p = perm.next(); +- } +- +- (checksum, maxflips) +-} +- +-fn fannkuch(n: i32) -> (i32, i32) { +- let perm = Perm::new(n as u32); +- +- let n = 1; +- let mut futures = vec![]; +- let k = perm.max() / n; +- +- for j in (0..).map(|x| x * k).take_while(|&j| j < k * n) { +- let max = cmp::min(j+k, perm.max()); +- +- futures.push(thread::spawn(move|| { +- work(perm, j as usize, max as usize) +- })) +- } +- +- let mut checksum = 0; +- let mut maxflips = 0; +- for fut in futures.into_iter() { +- let (cs, mf) = fut.join().unwrap(); +- checksum += cs; +- maxflips = cmp::max(maxflips, mf); +- } +- (checksum, maxflips) +-} +- +-fn main() { +- let n = std::env::args_os().nth(1) +- .and_then(|s| s.into_string().ok()) +- .and_then(|n| n.parse().ok()) +- .unwrap_or(7); +- +- let (checksum, maxflips) = fannkuch(n); +- println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips); +-} +diff --git a/third_party/rust/simd/examples/fannkuch-redux.rs b/third_party/rust/simd/examples/fannkuch-redux.rs +deleted file mode 100755 +index 2e52ae721135..000000000000 +--- a/third_party/rust/simd/examples/fannkuch-redux.rs ++++ /dev/null +@@ -1,233 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-#[macro_use] extern crate cfg_if; +-use simd::u8x16; +- +-use std::{env, process}; +- +-cfg_if! { +- if #[cfg(target_arch = "aarch64")] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::aarch64::neon::*; +- y.table_lookup_1(x) +- } +- } else if #[cfg(all(target_arch = "arm", +- target_feature = "neon"))] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::arm::neon::*; +- #[inline(always)] +- fn split(x: u8x16) -> (u8x8, u8x8) { +- unsafe {std::mem::transmute(x)} +- } +- fn join(x: u8x8, y: u8x8) -> u8x16 { +- unsafe {std::mem::transmute((x, y))} +- } +- +- let (t0, t1) = split(x); +- let (i0, i1) = split(y); +- join(i0.table_lookup_2(t0, t1), +- i1.table_lookup_2(t0, t1)) +- } +- } else if #[cfg(target_feature = "ssse3")] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::x86::ssse3::*; +- x.shuffle_bytes(y) +- } +- } else { +- // slow fallback, so tests work +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- u8x16::new(x.extract(y.extract(0) as u32), +- x.extract(y.extract(1) as u32), +- x.extract(y.extract(2) as u32), +- x.extract(y.extract(3) as u32), +- x.extract(y.extract(4) as u32), +- x.extract(y.extract(5) as u32), +- x.extract(y.extract(6) as u32), +- x.extract(y.extract(7) as u32), +- x.extract(y.extract(8) as u32), +- x.extract(y.extract(9) as u32), +- x.extract(y.extract(10) as u32), +- x.extract(y.extract(11) as u32), +- x.extract(y.extract(12) as u32), +- x.extract(y.extract(13) as u32), +- x.extract(y.extract(14) as u32), +- x.extract(y.extract(15) as u32)) +- } +- } +-} +-struct State { +- s: [u8; 16], +- flip_masks: [u8x16; 16], +- rotate_masks: [u8x16; 16], +- +- maxflips: i32, +- odd: u16, +- checksum: i32, +-} +-impl State { +- fn new() -> State { +- State { +- s: [0; 16], +- flip_masks: [u8x16::splat(0); 16], +- rotate_masks: [u8x16::splat(0); 16], +- +- maxflips: 0, +- odd: 0, +- checksum: 0, +- } +- } +- #[inline(never)] +- fn rotate_sisd(&mut self, n: usize) { +- let c = self.s[0]; +- for i in 1..(n + 1) { +- self.s[i - 1] = self.s[i]; +- } +- self.s[n] = c; +- } +- #[inline(never)] +- fn popmasks(&mut self) { +- let mut mask = [0_u8; 16]; +- for i in 0..16 { +- for j in 0..16 { mask[j] = j as u8; } +- +- for x in 0..(i+1)/2 { +- mask.swap(x, i - x); +- } +- +- self.flip_masks[i] = u8x16::load(&mask, 0); +- +- for j in 0..16 { self.s[j] = j as u8; } +- self.rotate_sisd(i); +- self.rotate_masks[i] = self.load_s(); +- } +- } +- fn rotate(&mut self, n: usize) { +- shuffle(self.load_s(), self.rotate_masks[n]).store(&mut self.s, 0) +- } +- +- fn load_s(&self) -> u8x16 { +- u8x16::load(&self.s, 0) +- } +- +- +- #[inline(never)] +- fn tk(&mut self, n: usize) { +- #[derive(Copy, Clone, Debug)] +- struct Perm { +- perm: u8x16, +- start: u8, +- odd: u16 +- } +- +- let mut perms = [Perm { perm: u8x16::splat(0), start: 0 , odd: 0 }; 60]; +- +- let mut i = 0; +- let mut c = [0_u8; 16]; +- let mut perm_max = 0; +- +- while i < n { +- while i < n && perm_max < 60 { +- self.rotate(i); +- if c[i] as usize >= i { +- c[i] = 0; +- i += 1; +- continue +- } +- +- c[i] += 1; +- i = 1; +- self.odd = !self.odd; +- if self.s[0] != 0 { +- if self.s[self.s[0] as usize] != 0 { +- perms[perm_max].perm = self.load_s(); +- perms[perm_max].start = self.s[0]; +- perms[perm_max].odd = self.odd; +- perm_max += 1; +- } else { +- if self.maxflips == 0 { self.maxflips = 1 } +- self.checksum += if self.odd != 0 { -1 } else { 1 }; +- } +- } +- } +- +- let mut k = 0; +- while k < std::cmp::max(1, perm_max) - 1 { +- let pk = &perms[k]; +- let pk1 = &perms[k + 1]; +- //println!("perm1 {:?}\nperm2 {:?}", pk.perm, pk1.perm); +- let mut perm1 = pk.perm; +- let mut perm2 = pk1.perm; +- +- let mut f1 = 0; +- let mut f2 = 0; +- let mut toterm1 = pk.start; +- let mut toterm2 = pk1.start; +- +- while toterm1 != 0 && toterm2 != 0 { +- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); +- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); +- toterm1 = perm1.extract(0); +- toterm2 = perm2.extract(0); +- +- f1 += 1; f2 += 1; +- } +- while toterm1 != 0 { +- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); +- toterm1 = perm1.extract(0); +- f1 += 1; +- } +- while toterm2 != 0 { +- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); +- toterm2 = perm2.extract(0); +- f2 += 1; +- } +- +- if f1 > self.maxflips { self.maxflips = f1 } +- if f2 > self.maxflips { self.maxflips = f2 } +- self.checksum += if pk.odd != 0 { -f1 } else { f1 }; +- self.checksum += if pk1.odd != 0 { -f2 } else { f2 }; +- +- k += 2; +- } +- while k < perm_max { +- let pk = &perms[k]; +- let mut perm = pk.perm; +- let mut f = 0; +- let mut toterm = pk.start; +- while toterm != 0 { +- perm = shuffle(perm, self.flip_masks[toterm as usize]); +- toterm = perm.extract(0); +- f += 1; +- } +- if f > self.maxflips { self.maxflips = f } +- self.checksum += if pk.odd != 0 { -f } else { f }; +- k += 1 +- } +- perm_max = 0; +- } +- } +-} +- +-fn main() { +- let mut state = State::new(); +- state.popmasks(); +- +- let args = env::args().collect::>(); +- if args.len() < 2 { +- println!("usage: {} number", args[0]); +- process::exit(1) +- } +- let max_n = args[1].parse().unwrap(); +- if max_n < 3 || max_n > 15 { +- println!("range: must be 3 <= n <= 14"); +- process::exit(1); +- } +- for i in 0..max_n { state.s[i] = i as u8 } +- state.tk(max_n); +- +- println!("{}\nPfannkuchen({}) = {}", state.checksum, max_n, state.maxflips); +-} +diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs +deleted file mode 100755 +index c6f1320a0784..000000000000 +--- a/third_party/rust/simd/examples/mandelbrot.rs ++++ /dev/null +@@ -1,125 +0,0 @@ +-#![feature(iterator_step_by, test)] +- +-extern crate test; +-extern crate simd; +-use simd::{f32x4, u32x4}; +-use std::io::prelude::*; +- +-#[inline(never)] +-fn mandelbrot_naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { +- let mut x = c_x; +- let mut y = c_y; +- let mut count = 0; +- while count < max_iter { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- if sum > 4.0 { +- break +- } +- count += 1; +- x = xx - yy + c_x; +- y = xy * 2.0 + c_y; +- } +- count +-} +- +-#[inline(never)] +-fn mandelbrot_vector(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x4::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x4::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x4::splat(1), +- u32x4::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-const COLOURS: &'static [(f32, f32, f32)] = &[(0.0, 7.0, 100.0), +- (32.0, 107.0, 203.0), +- (237.0, 255.0, 255.0), +- (255.0, 170.0, 0.0), +- (0.0, 2.0, 0.0)]; +-const SCALE: f32 = 12.0; +-const LIMIT: u32 = 100; +- +-#[inline(never)] +-fn output_one(buf: &mut [u8], val: u32) { +- let (r, g, b); +- if val == LIMIT { +- r = 0; +- g = 0; +- b = 0; +- } else { +- let val = (val as f32 % SCALE) * (COLOURS.len() as f32) / SCALE; +- let left = val as usize % COLOURS.len(); +- let right = (left + 1) % COLOURS.len(); +- +- let p = val - left as f32; +- let (r1, g1, b1) = COLOURS[left]; +- let (r2, g2, b2) = COLOURS[right]; +- r = (r1 + (r2 - r1) * p) as u8; +- g = (g1 + (g2 - g1) * p) as u8; +- b = (b1 + (b2 - b1) * p) as u8; +- } +- buf[0] = r; +- buf[1] = g; +- buf[2] = b; +-} +- +-fn main() { +- let mut args = std::env::args(); +- args.next(); +- let width = args.next().unwrap().parse().unwrap(); +- let height = args.next().unwrap().parse().unwrap(); +- +- let left = -2.2; +- let right = left + 3.0; +- let top = 1.0; +- let bottom = top - 2.0; +- +- let width_step: f32 = (right - left) / width as f32; +- let height_step: f32 = (bottom - top) / height as f32; +- +- let adjust = f32x4::splat(width_step) * f32x4::new(0., 1., 2., 3.); +- +- println!("P6 {} {} 255", width, height); +- let mut line = vec![0; width * 3]; +- +- if args.next().is_none() { +- for i in 0..height { +- let y = f32x4::splat(top + height_step * i as f32); +- for j in (0..width).step_by(4) { +- let x = f32x4::splat(left + width_step * j as f32) + adjust; +- let ret = mandelbrot_vector(x, y, LIMIT); +- test::black_box(ret); +- for k in 0..4 { let val = ret.extract(k as u32); output_one(&mut line[3*(j + k)..3*(j + k + 1)], val); } +- } +- ::std::io::stdout().write(&line).unwrap(); +- } +- } else { +- for i in 0..height { +- let y = top + height_step * i as f32; +- for j in 0..width { +- let x = left + width_step * j as f32; +- let val = mandelbrot_naive(x, y, LIMIT); +- test::black_box(val); +- output_one(&mut line[3*j..3*(j + 1)], val); +- } +- ::std::io::stdout().write(&line).unwrap(); +- } +- } +-} +diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs +deleted file mode 100644 +index e6eb7ffc4655..000000000000 +--- a/third_party/rust/simd/examples/matrix-inverse.rs ++++ /dev/null +@@ -1,281 +0,0 @@ +-extern crate simd; +-use simd::f32x4; +- +-fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] { +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- [f32x4::splat(y0.extract(0)) * x[0] + +- f32x4::splat(y0.extract(1)) * x[1] + +- f32x4::splat(y0.extract(2)) * x[2] + +- f32x4::splat(y0.extract(3)) * x[3], +- f32x4::splat(y1.extract(0)) * x[0] + +- f32x4::splat(y1.extract(1)) * x[1] + +- f32x4::splat(y1.extract(2)) * x[2] + +- f32x4::splat(y1.extract(3)) * x[3], +- f32x4::splat(y2.extract(0)) * x[0] + +- f32x4::splat(y2.extract(1)) * x[1] + +- f32x4::splat(y2.extract(2)) * x[2] + +- f32x4::splat(y2.extract(3)) * x[3], +- f32x4::splat(y3.extract(0)) * x[0] + +- f32x4::splat(y3.extract(1)) * x[1] + +- f32x4::splat(y3.extract(2)) * x[2] + +- f32x4::splat(y3.extract(3)) * x[3], +- ] +-} +- +-#[allow(dead_code)] +-fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] { +- let mut t = [[0_f32; 4]; 4]; +- for i in 0..4 { +- t[0][i] = x[i][0]; +- t[1][i] = x[i][1]; +- t[2][i] = x[i][2]; +- t[3][i] = x[i][3]; +- } +- println!("{:?}", t); +- +- let _0 = t[2][2] * t[3][3]; +- let _1 = t[2][3] * t[3][2]; +- let _2 = t[2][1] * t[3][3]; +- let _3 = t[2][3] * t[3][1]; +- let _4 = t[2][1] * t[3][2]; +- let _5 = t[2][2] * t[3][1]; +- let _6 = t[2][0] * t[3][3]; +- let _7 = t[2][3] * t[3][0]; +- let _8 = t[2][0] * t[3][2]; +- let _9 = t[2][2] * t[3][0]; +- let _10 = t[2][0] * t[3][1]; +- let _11 = t[2][1] * t[3][0]; +- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; +- println!("{:?}", v); +- +- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - +- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); +- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - +- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); +- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - +- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); +- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - +- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); +- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - +- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); +- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - +- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); +- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - +- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); +- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - +- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); +- +- println!("{:?}", [d00, d01, d02, d03, d10, d11, d12, d13]); +- +- let _0 = t[0][2] * t[1][3]; +- let _1 = t[0][3] * t[1][2]; +- let _2 = t[0][1] * t[1][3]; +- let _3 = t[0][3] * t[1][1]; +- let _4 = t[0][1] * t[1][2]; +- let _5 = t[0][2] * t[1][1]; +- let _6 = t[0][0] * t[1][3]; +- let _7 = t[0][3] * t[1][0]; +- let _8 = t[0][0] * t[1][2]; +- let _9 = t[0][2] * t[1][0]; +- let _10 = t[0][0] * t[1][1]; +- let _11 = t[0][1] * t[1][0]; +- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; +- println!("{:?}", v); +- +- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- +- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); +- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- +- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); +- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- +- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); +- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- +- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); +- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- +- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); +- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- +- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); +- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- +- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); +- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- +- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); +- +- println!("{:?}", [d20, d21, d22, d23, d30, d31, d32, d33]); +- +- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; +- +- let det = 1.0 / det; +- let mut ret = [[d00, d01, d02, d03], +- [d10, d11, d12, d13], +- [d20, d21, d22, d23], +- [d30, d31, d32, d33]]; +- for i in 0..4 { +- for j in 0..4 { +- ret[i][j] *= det; +- } +- } +- ret +-} +- +-fn inverse_simd4(x: &[f32x4; 4]) -> [f32x4; 4] { +- let src0 = x[0]; +- let src1 = x[1]; +- let src2 = x[2]; +- let src3 = x[3]; +- +- let tmp1 = f32x4::new(src0.extract(0), src0.extract(1), +- src1.extract(4 - 4), src1.extract(5 - 4)); +- let row1 = f32x4::new(src2.extract(0), src2.extract(1), +- src3.extract(4 - 4), src3.extract(5 - 4)); +- let row0 = f32x4::new(tmp1.extract(0), tmp1.extract(2), +- row1.extract(4 - 4), row1.extract(6 - 4)); +- let row1 = f32x4::new(row1.extract(1), row1.extract(3), +- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); +- +- let tmp1 = f32x4::new(src0.extract(2), src0.extract(3), +- src1.extract(6 - 4), src1.extract(7 - 4)); +- let row3 = f32x4::new(src2.extract(2), src2.extract(3), +- src3.extract(6 - 4), src3.extract(7 - 4)); +- let row2 = f32x4::new(tmp1.extract(0), tmp1.extract(2), +- row3.extract(4 - 4), row3.extract(6 - 4)); +- let row3 = f32x4::new(row3.extract(1), row3.extract(3), +- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); +- +- +- let tmp1 = row2 * row3; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor0 = row1 * tmp1; +- let minor1 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor0 = (row1 * tmp1) - minor0; +- let minor1 = (row0 * tmp1) - minor1; +- let minor1 = f32x4::new(minor1.extract(2), minor1.extract(3), +- minor1.extract(0), minor1.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row1 * row2; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor0 = (row3 * tmp1) + minor0; +- let minor3 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- +- let minor0 = minor0 - row3 * tmp1; +- let minor3 = row0 * tmp1 - minor3; +- let minor3 = f32x4::new(minor3.extract(2), minor3.extract(3), +- minor3.extract(0), minor3.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row3 * f32x4::new(row1.extract(2), row1.extract(3), +- row1.extract(0), row1.extract(1)); +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let row2 = f32x4::new(row2.extract(2), row2.extract(3), +- row2.extract(0), row2.extract(1)); +- let minor0 = row2 * tmp1 + minor0; +- let minor2 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor0 = minor0 - row2 * tmp1; +- let minor2 = row0 * tmp1 - minor2; +- let minor2 = f32x4::new(minor2.extract(2), minor2.extract(3), +- minor2.extract(0), minor2.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row0 * row1; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor2 = minor2 + row3 * tmp1; +- let minor3 = row2 * tmp1 - minor3; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor2 = row3 * tmp1 - minor2; +- let minor3 = minor3 - row2 * tmp1; +- //println!("{:?}", minor1); +- +- +- +- let tmp1 = row0 * row3; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor1 = minor1 - row2 * tmp1; +- let minor2 = row1 * tmp1 + minor2; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor1 = row2 * tmp1 + minor1; +- let minor2 = minor2 - row1 * tmp1; +- //println!("{:?}", minor1); +- +- let tmp1 = row0 * row2; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor1 = row3 * tmp1 + minor1; +- let minor3 = minor3 - row1 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor1 = minor1 - row3 * tmp1; +- let minor3 = row1 * tmp1 + minor3; +- //println!("{:?}", minor1); +- +- let det = row0 * minor0; +- let det = f32x4::new(det.extract(2), det.extract(3), +- det.extract(0), det.extract(1)) + det; +- let det = f32x4::new(det.extract(1), det.extract(0), +- det.extract(3), det.extract(2)) + det; +- let tmp1 = det.approx_reciprocal(); +- let det = tmp1 + tmp1 - det * tmp1 * tmp1; +- +-// let det = f32x4::splat(det.extract(0)); +- +- [minor0 * det, minor1 * det, minor2 * det, minor3 * det] +-} +- +-fn p(x: &[f32x4; 4]) { +- for xx in x { +- for i in 0..4 { +- let v = xx.extract(i); +- if v == 0.0 { +- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, ""); +- } else { +- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, xx.extract(i)); +- } +- } +- println!(" |"); +- } +-} +- +-fn main() { +- let x = [f32x4::new(-100.0, 6.0, 100.0, 1.0), +- f32x4::new(3.0, 1.0, 0.0, 1.0), +- f32x4::new(2.0, 1.0, 1.0, 1.0), +- f32x4::new(-10.0, 1.0, 1.0, 1.0)]; +- +- /* let mut x_ = [[0.0; 4]; 4]; +- for i in 0..4 { +- for j in 0..4 { +- x_[i][j] = x[i].extract(j as u32) +- } +- } +- +- let ret = inverse_naive(&x_); +- let mut y = [f32x4::splat(0.0); 4]; +- for i in 0..4 { +- for j in 0..4 { +- y[i] = y[i].replace(j as u32, ret[i][j]) +- } +-}*/ +- let y = inverse_simd4(&x); +- p(&x); +- println!(""); +- p(&y); +- println!(""); +- p(&mul(&x, &y)) +-} +diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs +deleted file mode 100644 +index d5f1bb422ff2..000000000000 +--- a/third_party/rust/simd/examples/nbody-nosimd.rs ++++ /dev/null +@@ -1,156 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-const PI: f64 = 3.141592653589793; +-const SOLAR_MASS: f64 = 4.0 * PI * PI; +-const YEAR: f64 = 365.24; +-const N_BODIES: usize = 5; +- +-static BODIES: [Planet;N_BODIES] = [ +- // Sun +- Planet { +- x: 0.0, y: 0.0, z: 0.0, +- vx: 0.0, vy: 0.0, vz: 0.0, +- mass: SOLAR_MASS, +- }, +- // Jupiter +- Planet { +- x: 4.84143144246472090e+00, +- y: -1.16032004402742839e+00, +- z: -1.03622044471123109e-01, +- vx: 1.66007664274403694e-03 * YEAR, +- vy: 7.69901118419740425e-03 * YEAR, +- vz: -6.90460016972063023e-05 * YEAR, +- mass: 9.54791938424326609e-04 * SOLAR_MASS, +- }, +- // Saturn +- Planet { +- x: 8.34336671824457987e+00, +- y: 4.12479856412430479e+00, +- z: -4.03523417114321381e-01, +- vx: -2.76742510726862411e-03 * YEAR, +- vy: 4.99852801234917238e-03 * YEAR, +- vz: 2.30417297573763929e-05 * YEAR, +- mass: 2.85885980666130812e-04 * SOLAR_MASS, +- }, +- // Uranus +- Planet { +- x: 1.28943695621391310e+01, +- y: -1.51111514016986312e+01, +- z: -2.23307578892655734e-01, +- vx: 2.96460137564761618e-03 * YEAR, +- vy: 2.37847173959480950e-03 * YEAR, +- vz: -2.96589568540237556e-05 * YEAR, +- mass: 4.36624404335156298e-05 * SOLAR_MASS, +- }, +- // Neptune +- Planet { +- x: 1.53796971148509165e+01, +- y: -2.59193146099879641e+01, +- z: 1.79258772950371181e-01, +- vx: 2.68067772490389322e-03 * YEAR, +- vy: 1.62824170038242295e-03 * YEAR, +- vz: -9.51592254519715870e-05 * YEAR, +- mass: 5.15138902046611451e-05 * SOLAR_MASS, +- }, +-]; +- +-#[derive(Clone, Copy)] +-struct Planet { +- x: f64, y: f64, z: f64, +- vx: f64, vy: f64, vz: f64, +- mass: f64, +-} +- +-fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) { +- for _ in 0..steps { +- let mut b_slice: &mut [_] = bodies; +- loop { +- let bi = match shift_mut_ref(&mut b_slice) { +- Some(bi) => bi, +- None => break +- }; +- for bj in b_slice.iter_mut() { +- let dx = bi.x - bj.x; +- let dy = bi.y - bj.y; +- let dz = bi.z - bj.z; +- +- let d2 = dx * dx + dy * dy + dz * dz; +- let mag = dt / (d2 * d2.sqrt()); +- +- let massj_mag = bj.mass * mag; +- bi.vx -= dx * massj_mag; +- bi.vy -= dy * massj_mag; +- bi.vz -= dz * massj_mag; +- +- let massi_mag = bi.mass * mag; +- bj.vx += dx * massi_mag; +- bj.vy += dy * massi_mag; +- bj.vz += dz * massi_mag; +- } +- bi.x += dt * bi.vx; +- bi.y += dt * bi.vy; +- bi.z += dt * bi.vz; +- } +- } +-} +- +-fn energy(bodies: &[Planet;N_BODIES]) -> f64 { +- let mut e = 0.0; +- let mut bodies = bodies.iter(); +- loop { +- let bi = match bodies.next() { +- Some(bi) => bi, +- None => break +- }; +- e += (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz) * bi.mass / 2.0; +- for bj in bodies.clone() { +- let dx = bi.x - bj.x; +- let dy = bi.y - bj.y; +- let dz = bi.z - bj.z; +- let dist = (dx * dx + dy * dy + dz * dz).sqrt(); +- e -= bi.mass * bj.mass / dist; +- } +- } +- e +-} +- +-fn offset_momentum(bodies: &mut [Planet;N_BODIES]) { +- let mut px = 0.0; +- let mut py = 0.0; +- let mut pz = 0.0; +- for bi in bodies.iter() { +- px += bi.vx * bi.mass; +- py += bi.vy * bi.mass; +- pz += bi.vz * bi.mass; +- } +- let sun = &mut bodies[0]; +- sun.vx = - px / SOLAR_MASS; +- sun.vy = - py / SOLAR_MASS; +- sun.vz = - pz / SOLAR_MASS; +-} +- +-fn main() { +- let n = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- let mut bodies = BODIES; +- +- offset_momentum(&mut bodies); +- println!("{:.9}", energy(&bodies)); +- +- advance(&mut bodies, 0.01, n); +- +- println!("{:.9}", energy(&bodies)); +-} +- +-/// Pop a mutable reference off the head of a slice, mutating the slice to no +-/// longer contain the mutable reference. +-fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> { +- if r.len() == 0 { return None } +- let tmp = std::mem::replace(r, &mut []); +- let (h, t) = tmp.split_at_mut(1); +- *r = t; +- Some(&mut h[0]) +-} +diff --git a/third_party/rust/simd/examples/nbody.rs b/third_party/rust/simd/examples/nbody.rs +deleted file mode 100755 +index d6d4e88e3741..000000000000 +--- a/third_party/rust/simd/examples/nbody.rs ++++ /dev/null +@@ -1,170 +0,0 @@ +-#![feature(cfg_target_feature)] +- +-extern crate simd; +- +-#[cfg(target_feature = "sse2")] +-use simd::x86::sse2::*; +-#[cfg(target_arch = "aarch64")] +-use simd::aarch64::neon::*; +- +-const PI: f64 = 3.141592653589793; +-const SOLAR_MASS: f64 = 4.0 * PI * PI; +-const DAYS_PER_YEAR: f64 = 365.24; +- +-struct Body { +- x: [f64; 3], +- _fill: f64, +- v: [f64; 3], +- mass: f64, +-} +- +-impl Body { +- fn new(x0: f64, x1: f64, x2: f64, +- v0: f64, v1: f64, v2: f64, +- mass: f64) -> Body { +- Body { +- x: [x0, x1, x2], +- _fill: 0.0, +- v: [v0, v1, v2], +- mass: mass, +- } +- } +-} +- +-const N_BODIES: usize = 5; +-const N: usize = N_BODIES * (N_BODIES - 1) / 2; +-fn offset_momentum(bodies: &mut [Body; N_BODIES]) { +- let (sun, rest) = bodies.split_at_mut(1); +- let sun = &mut sun[0]; +- for body in rest { +- for k in 0..3 { +- sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS; +- } +- } +-} +-fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { +- let mut r = [[0.0; 4]; N]; +- let mut mag = [0.0; N]; +- +- let mut dx = [f64x2::splat(0.0); 3]; +- let mut dsquared; +- let mut distance; +- let mut dmag; +- +- let mut i = 0; +- for j in 0..N_BODIES { +- for k in j+1..N_BODIES { +- for m in 0..3 { +- r[i][m] = bodies[j].x[m] - bodies[k].x[m]; +- } +- i += 1; +- } +- } +- +- i = 0; +- while i < N { +- for m in 0..3 { +- dx[m] = f64x2::new(r[i][m], r[i+1][m]); +- } +- +- dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; +- distance = dsquared.to_f32().approx_rsqrt().to_f64(); +- for _ in 0..2 { +- distance = distance * f64x2::splat(1.5) - +- ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance) +- } +- dmag = f64x2::splat(dt) / dsquared * distance; +- dmag.store(&mut mag, i); +- +- i += 2; +- } +- +- i = 0; +- for j in 0..N_BODIES { +- for k in j+1..N_BODIES { +- for m in 0..3 { +- bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i]; +- bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i]; +- } +- i += 1 +- } +- } +- for body in bodies { +- for m in 0..3 { +- body.x[m] += dt * body.v[m] +- } +- } +-} +- +-fn energy(bodies: &[Body; N_BODIES]) -> f64 { +- let mut e = 0.0; +- for i in 0..N_BODIES { +- let bi = &bodies[i]; +- e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0; +- for j in i+1..N_BODIES { +- let bj = &bodies[j]; +- let mut dx = [0.0; 3]; +- for k in 0..3 { +- dx[k] = bi.x[k] - bj.x[k]; +- } +- let mut distance = 0.0; +- for &d in &dx { distance += d * d } +- e -= bi.mass * bj.mass / distance.sqrt() +- } +- } +- e +-} +- +-fn main() { +- let mut bodies: [Body; N_BODIES] = [ +- /* sun */ +- Body::new(0.0, 0.0, 0.0, +- 0.0, 0.0, 0.0, +- SOLAR_MASS), +- /* jupiter */ +- Body::new(4.84143144246472090e+00, +- -1.16032004402742839e+00, +- -1.03622044471123109e-01 , +- 1.66007664274403694e-03 * DAYS_PER_YEAR, +- 7.69901118419740425e-03 * DAYS_PER_YEAR, +- -6.90460016972063023e-05 * DAYS_PER_YEAR , +- 9.54791938424326609e-04 * SOLAR_MASS +- ), +- /* saturn */ +- Body::new(8.34336671824457987e+00, +- 4.12479856412430479e+00, +- -4.03523417114321381e-01 , +- -2.76742510726862411e-03 * DAYS_PER_YEAR, +- 4.99852801234917238e-03 * DAYS_PER_YEAR, +- 2.30417297573763929e-05 * DAYS_PER_YEAR , +- 2.85885980666130812e-04 * SOLAR_MASS +- ), +- /* uranus */ +- Body::new(1.28943695621391310e+01, +- -1.51111514016986312e+01, +- -2.23307578892655734e-01 , +- 2.96460137564761618e-03 * DAYS_PER_YEAR, +- 2.37847173959480950e-03 * DAYS_PER_YEAR, +- -2.96589568540237556e-05 * DAYS_PER_YEAR , +- 4.36624404335156298e-05 * SOLAR_MASS +- ), +- /* neptune */ +- Body::new(1.53796971148509165e+01, +- -2.59193146099879641e+01, +- 1.79258772950371181e-01 , +- 2.68067772490389322e-03 * DAYS_PER_YEAR, +- 1.62824170038242295e-03 * DAYS_PER_YEAR, +- -9.51592254519715870e-05 * DAYS_PER_YEAR , +- 5.15138902046611451e-05 * SOLAR_MASS +- ) +- ]; +- +- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- +- offset_momentum(&mut bodies); +- println!("{:.9}", energy(&bodies)); +- for _ in 0..n { +- advance(&mut bodies, 0.01); +- } +- println!("{:.9}", energy(&bodies)); +-} +diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs +deleted file mode 100644 +index f8c919101e3c..000000000000 +--- a/third_party/rust/simd/examples/ops.rs ++++ /dev/null +@@ -1,10 +0,0 @@ +-extern crate simd; +- +-use simd::*; +- +-#[allow(unused_variables)] +-fn main() { +- let x = i32x4::splat(1_i32); +- let y = -x; +- let z = !x; +-} +diff --git a/third_party/rust/simd/examples/spectral-norm-nosimd.rs b/third_party/rust/simd/examples/spectral-norm-nosimd.rs +deleted file mode 100644 +index 919f9c61990f..000000000000 +--- a/third_party/rust/simd/examples/spectral-norm-nosimd.rs ++++ /dev/null +@@ -1,106 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-#![allow(non_snake_case)] +- +-use std::iter::repeat; +-//use std::thread; +- +-// As std::simd::f64x2 is unstable, we provide a similar interface, +-// expecting llvm to autovectorize its usage. +-#[allow(non_camel_case_types)] +-struct f64x2(f64, f64); +-impl std::ops::Add for f64x2 { +- type Output = Self; +- fn add(self, rhs: Self) -> Self { +- f64x2(self.0 + rhs.0, self.1 + rhs.1) +- } +-} +-impl std::ops::Div for f64x2 { +- type Output = Self; +- fn div(self, rhs: Self) -> Self { +- f64x2(self.0 / rhs.0, self.1 / rhs.1) +- } +-} +- +-fn main() { +- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- let answer = spectralnorm(n); +- println!("{:.9}", answer); +-} +- +-fn spectralnorm(n: usize) -> f64 { +- assert!(n % 2 == 0, "only even lengths are accepted"); +- let mut u = repeat(1.0).take(n).collect::>(); +- let mut v = u.clone(); +- let mut tmp = v.clone(); +- for _ in 0..10 { +- mult_AtAv(&u, &mut v, &mut tmp); +- mult_AtAv(&v, &mut u, &mut tmp); +- } +- (dot(&u, &v) / dot(&v, &v)).sqrt() +-} +- +-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { +- mult_Av(v, tmp); +- mult_Atv(tmp, out); +-} +- +-fn mult_Av(v: &[f64], out: &mut [f64]) { +- parallel(out, |start, out| mult(v, out, start, |i, j| A(i, j))); +-} +- +-fn mult_Atv(v: &[f64], out: &mut [f64]) { +- parallel(out, |start, out| mult(v, out, start, |i, j| A(j, i))); +-} +- +-fn mult(v: &[f64], out: &mut [f64], start: usize, a: F) +- where F: Fn(usize, usize) -> f64 { +- for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) { +- let mut sum = f64x2(0.0, 0.0); +- for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { +- let top = f64x2(chunk[0], chunk[1]); +- let bot = f64x2(a(i, j), a(i, j + 1)); +- sum = sum + top / bot; +- } +- let f64x2(a, b) = sum; +- *slot = a + b; +- } +-} +- +-fn A(i: usize, j: usize) -> f64 { +- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +-} +- +-fn dot(v: &[f64], u: &[f64]) -> f64 { +- v.iter().zip(u.iter()).map(|(a, b)| *a * *b).fold(0., |acc, i| acc + i) +-} +- +-//struct Racy(T); +-//unsafe impl Send for Racy {} +- +-// Executes a closure in parallel over the given mutable slice. The closure `f` +-// is run in parallel and yielded the starting index within `v` as well as a +-// sub-slice of `v`. +-fn parallel<'a, T, F>(v: &mut [T], ref f: F) +- where T: 'static + Send + Sync, +-F: Fn(usize, &mut [T]) + Sync +-{ +- f(0, v); +- /*let size = v.len() / 4 + 1; +- let jhs = v.chunks_mut(size).enumerate().map(|(i, chunk)| { +- // Need to convert `f` and `chunk` to something that can cross the task +- // boundary. +- let f = Racy(f as *const F as *const usize); +- let raw = Racy((&mut chunk[0] as *mut T, chunk.len())); +- thread::spawn(move|| { +- let f = f.0 as *const F; +- let raw = raw.0; +- unsafe { (*f)(i * size, std::slice::from_raw_parts_mut(raw.0, raw.1)) } +- }) +- }).collect::>(); +- for jh in jhs { jh.join().unwrap(); }*/ +-} +diff --git a/third_party/rust/simd/examples/spectral-norm.rs b/third_party/rust/simd/examples/spectral-norm.rs +deleted file mode 100755 +index 656f52e4fad0..000000000000 +--- a/third_party/rust/simd/examples/spectral-norm.rs ++++ /dev/null +@@ -1,74 +0,0 @@ +-#![feature(cfg_target_feature)] +-#![allow(non_snake_case)] +- +-extern crate simd; +- +-#[cfg(target_feature = "sse2")] +-use simd::x86::sse2::f64x2; +-#[cfg(target_arch = "aarch64")] +-use simd::aarch64::neon::f64x2; +- +-fn A(i: usize, j: usize) -> f64 { +- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +-} +- +-fn dot(x: &[f64], y: &[f64]) -> f64 { +- x.iter().zip(y).map(|(&x, &y)| x * y).fold(0.0, |a, b| a + b) +-} +- +-fn mult_Av(v: &[f64], out: &mut [f64]) { +- assert!(v.len() == out.len()); +- assert!(v.len() % 2 == 0); +- +- for i in 0..v.len() { +- let mut sum = f64x2::splat(0.0); +- +- let mut j = 0; +- while j < v.len() { +- let b = f64x2::load(v, j); +- let a = f64x2::new(A(i, j), A(i, j + 1)); +- sum = sum + b / a; +- j += 2 +- } +- out[i] = sum.extract(0) + sum.extract(1); +- } +-} +- +-fn mult_Atv(v: &[f64], out: &mut [f64]) { +- assert!(v.len() == out.len()); +- assert!(v.len() % 2 == 0); +- +- for i in 0..v.len() { +- let mut sum = f64x2::splat(0.0); +- +- let mut j = 0; +- while j < v.len() { +- let b = f64x2::load(v, j); +- let a = f64x2::new(A(j, i), A(j + 1, i)); +- sum = sum + b / a; +- j += 2 +- } +- out[i] = sum.extract(0) + sum.extract(1); +- } +-} +- +-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { +- mult_Av(v, tmp); +- mult_Atv(tmp, out); +-} +- +-fn main() { +- let mut n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- if n % 2 == 1 { n += 1 } +- +- let mut u = vec![1.0; n]; +- let mut v = u.clone(); +- let mut tmp = u.clone(); +- +- for _ in 0..10 { +- mult_AtAv(&u, &mut v, &mut tmp); +- mult_AtAv(&v, &mut u, &mut tmp); +- } +- +- println!("{:.9}", (dot(&u, &v) / dot(&v, &v)).sqrt()); +-} +diff --git a/third_party/rust/simd/src/aarch64/mod.rs b/third_party/rust/simd/src/aarch64/mod.rs +deleted file mode 100644 +index 5ba0a302b4d1..000000000000 +--- a/third_party/rust/simd/src/aarch64/mod.rs ++++ /dev/null +@@ -1,3 +0,0 @@ +-//! Features specific to AArch64 CPUs. +- +-pub mod neon; +diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs +deleted file mode 100644 +index 0cca05a52788..000000000000 +--- a/third_party/rust/simd/src/aarch64/neon.rs ++++ /dev/null +@@ -1,681 +0,0 @@ +-use super::super::*; +-use {simd_cast, f32x2}; +- +-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u32x2(u32, u32); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i32x2(i32, i32); +- +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u16x4(u16, u16, u16, u16); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i16x4(i16, i16, i16, i16); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u8x8(u8, u8, u8, u8, +- u8, u8, u8, u8); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i8x8(i8, i8, i8, i8, +- i8, i8, i8, i8); +- +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i64x1(i64); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u64x1(u64); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct f64x1(f64); +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn aarch64_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vuqadd_s8(x: i8x16, y: u8x16) -> i8x16; +- fn aarch64_vuqadd_s16(x: i16x8, y: u16x8) -> i16x8; +- fn aarch64_vuqadd_s32(x: i32x4, y: u32x4) -> i32x4; +- fn aarch64_vuqadd_s64(x: i64x2, y: u64x2) -> i64x2; +- fn aarch64_vsqadd_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vsqadd_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vsqadd_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vsqadd_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn aarch64_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn aarch64_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn aarch64_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn aarch64_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn aarch64_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn aarch64_vfmulx_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vfmulx_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vfmulxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vfmulxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vfma_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vfma_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vfmaq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vmull_s8(x: i8x8, y: i8x8) -> i16x8; +- fn aarch64_vmull_u8(x: u8x8, y: u8x8) -> u16x8; +- fn aarch64_vmull_s16(x: i16x4, y: i16x4) -> i32x4; +- fn aarch64_vmull_u16(x: u16x4, y: u16x4) -> u32x4; +- fn aarch64_vmull_s32(x: i32x2, y: i32x2) -> i64x2; +- fn aarch64_vmull_u32(x: u32x2, y: u32x2) -> u64x2; +- fn aarch64_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; +- fn aarch64_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; +- fn aarch64_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn aarch64_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn aarch64_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn aarch64_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn aarch64_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn aarch64_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn aarch64_vabd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vabd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vabd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vabd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vabd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vabd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vabd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vabd_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vabdq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmax_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vmaxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmin_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vminq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmaxnm_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vminnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vminnm_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vminnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vminnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vsri_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vsri_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vsri_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vsri_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vsri_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vsri_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vsri_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vsri_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vsli_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vsli_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vsli_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vsli_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vsli_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vsli_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vsli_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vsli_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vvqmovn_s16(x: i16x8) -> i8x8; +- fn aarch64_vvqmovn_u16(x: u16x8) -> u8x8; +- fn aarch64_vvqmovn_s32(x: i32x4) -> i16x4; +- fn aarch64_vvqmovn_u32(x: u32x4) -> u16x4; +- fn aarch64_vvqmovn_s64(x: i64x2) -> i32x2; +- fn aarch64_vvqmovn_u64(x: u64x2) -> u32x2; +- fn aarch64_vabs_s8(x: i8x8) -> i8x8; +- fn aarch64_vabs_s16(x: i16x4) -> i16x4; +- fn aarch64_vabs_s32(x: i32x2) -> i32x2; +- fn aarch64_vabs_s64(x: i64x1) -> i64x1; +- fn aarch64_vabsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vabsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vabsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vabsq_s64(x: i64x2) -> i64x2; +- fn aarch64_vabs_f32(x: f32x2) -> f32x2; +- fn aarch64_vabs_f64(x: f64x1) -> f64x1; +- fn aarch64_vabsq_f32(x: f32x4) -> f32x4; +- fn aarch64_vabsq_f64(x: f64x2) -> f64x2; +- fn aarch64_vqabs_s8(x: i8x8) -> i8x8; +- fn aarch64_vqabs_s16(x: i16x4) -> i16x4; +- fn aarch64_vqabs_s32(x: i32x2) -> i32x2; +- fn aarch64_vqabs_s64(x: i64x1) -> i64x1; +- fn aarch64_vqabsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vqabsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vqabsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vqabsq_s64(x: i64x2) -> i64x2; +- fn aarch64_vqneg_s8(x: i8x8) -> i8x8; +- fn aarch64_vqneg_s16(x: i16x4) -> i16x4; +- fn aarch64_vqneg_s32(x: i32x2) -> i32x2; +- fn aarch64_vqneg_s64(x: i64x1) -> i64x1; +- fn aarch64_vqnegq_s8(x: i8x16) -> i8x16; +- fn aarch64_vqnegq_s16(x: i16x8) -> i16x8; +- fn aarch64_vqnegq_s32(x: i32x4) -> i32x4; +- fn aarch64_vqnegq_s64(x: i64x2) -> i64x2; +- fn aarch64_vclz_s8(x: i8x8) -> i8x8; +- fn aarch64_vclz_u8(x: u8x8) -> u8x8; +- fn aarch64_vclz_s16(x: i16x4) -> i16x4; +- fn aarch64_vclz_u16(x: u16x4) -> u16x4; +- fn aarch64_vclz_s32(x: i32x2) -> i32x2; +- fn aarch64_vclz_u32(x: u32x2) -> u32x2; +- fn aarch64_vclzq_s8(x: i8x16) -> i8x16; +- fn aarch64_vclzq_u8(x: u8x16) -> u8x16; +- fn aarch64_vclzq_s16(x: i16x8) -> i16x8; +- fn aarch64_vclzq_u16(x: u16x8) -> u16x8; +- fn aarch64_vclzq_s32(x: i32x4) -> i32x4; +- fn aarch64_vclzq_u32(x: u32x4) -> u32x4; +- fn aarch64_vcls_s8(x: i8x8) -> i8x8; +- fn aarch64_vcls_u8(x: u8x8) -> u8x8; +- fn aarch64_vcls_s16(x: i16x4) -> i16x4; +- fn aarch64_vcls_u16(x: u16x4) -> u16x4; +- fn aarch64_vcls_s32(x: i32x2) -> i32x2; +- fn aarch64_vcls_u32(x: u32x2) -> u32x2; +- fn aarch64_vclsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vclsq_u8(x: u8x16) -> u8x16; +- fn aarch64_vclsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vclsq_u16(x: u16x8) -> u16x8; +- fn aarch64_vclsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vclsq_u32(x: u32x4) -> u32x4; +- fn aarch64_vcnt_s8(x: i8x8) -> i8x8; +- fn aarch64_vcnt_u8(x: u8x8) -> u8x8; +- fn aarch64_vcntq_s8(x: i8x16) -> i8x16; +- fn aarch64_vcntq_u8(x: u8x16) -> u8x16; +- fn aarch64_vrecpe_u32(x: u32x2) -> u32x2; +- fn aarch64_vrecpe_f32(x: f32x2) -> f32x2; +- fn aarch64_vrecpe_f64(x: f64x1) -> f64x1; +- fn aarch64_vrecpeq_u32(x: u32x4) -> u32x4; +- fn aarch64_vrecpeq_f32(x: f32x4) -> f32x4; +- fn aarch64_vrecpeq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vrecps_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vrecpsq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vsqrt_f32(x: f32x2) -> f32x2; +- fn aarch64_vsqrt_f64(x: f64x1) -> f64x1; +- fn aarch64_vsqrtq_f32(x: f32x4) -> f32x4; +- fn aarch64_vsqrtq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrsqrte_u32(x: u32x2) -> u32x2; +- fn aarch64_vrsqrte_f32(x: f32x2) -> f32x2; +- fn aarch64_vrsqrte_f64(x: f64x1) -> f64x1; +- fn aarch64_vrsqrteq_u32(x: u32x4) -> u32x4; +- fn aarch64_vrsqrteq_f32(x: f32x4) -> f32x4; +- fn aarch64_vrsqrteq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vrsqrts_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vrsqrtsq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vrbit_s8(x: i8x8) -> i8x8; +- fn aarch64_vrbit_u8(x: u8x8) -> u8x8; +- fn aarch64_vrbitq_s8(x: i8x16) -> i8x16; +- fn aarch64_vrbitq_u8(x: u8x16) -> u8x16; +- fn aarch64_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpaddq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpaddq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpaddl_s16(x: i8x8) -> i16x4; +- fn aarch64_vpaddl_u16(x: u8x8) -> u16x4; +- fn aarch64_vpaddl_s32(x: i16x4) -> i32x2; +- fn aarch64_vpaddl_u32(x: u16x4) -> u32x2; +- fn aarch64_vpaddl_s64(x: i32x2) -> i64x1; +- fn aarch64_vpaddl_u64(x: u32x2) -> u64x1; +- fn aarch64_vpaddlq_s16(x: i8x16) -> i16x8; +- fn aarch64_vpaddlq_u16(x: u8x16) -> u16x8; +- fn aarch64_vpaddlq_s32(x: i16x8) -> i32x4; +- fn aarch64_vpaddlq_u32(x: u16x8) -> u32x4; +- fn aarch64_vpaddlq_s64(x: i32x4) -> i64x2; +- fn aarch64_vpaddlq_u64(x: u32x4) -> u64x2; +- fn aarch64_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpmaxq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpmaxq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpmaxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpminq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpminq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpminq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpmaxnm_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmaxnm_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmaxnm_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmaxnm_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmaxnm_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmaxnm_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpmaxnmq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpmaxnmq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpmaxnmq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpmaxnmq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpmaxnmq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpmaxnmq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpmaxnmq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpmaxnmq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpminnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpminnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpminnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vaddv_s8(x: i8x8) -> i8; +- fn aarch64_vaddv_u8(x: u8x8) -> u8; +- fn aarch64_vaddv_s16(x: i16x4) -> i16; +- fn aarch64_vaddv_u16(x: u16x4) -> u16; +- fn aarch64_vaddv_s32(x: i32x2) -> i32; +- fn aarch64_vaddv_u32(x: u32x2) -> u32; +- fn aarch64_vaddv_f32(x: f32x2) -> f32; +- fn aarch64_vaddvq_s8(x: i8x16) -> i8; +- fn aarch64_vaddvq_u8(x: u8x16) -> u8; +- fn aarch64_vaddvq_s16(x: i16x8) -> i16; +- fn aarch64_vaddvq_u16(x: u16x8) -> u16; +- fn aarch64_vaddvq_s32(x: i32x4) -> i32; +- fn aarch64_vaddvq_u32(x: u32x4) -> u32; +- fn aarch64_vaddvq_f32(x: f32x4) -> f32; +- fn aarch64_vaddvq_s64(x: i64x2) -> i64; +- fn aarch64_vaddvq_u64(x: u64x2) -> u64; +- fn aarch64_vaddvq_f64(x: f64x2) -> f64; +- fn aarch64_vaddlv_s8(x: i8x8) -> i16; +- fn aarch64_vaddlv_u8(x: u8x8) -> u16; +- fn aarch64_vaddlv_s16(x: i16x4) -> i32; +- fn aarch64_vaddlv_u16(x: u16x4) -> u32; +- fn aarch64_vaddlv_s32(x: i32x2) -> i64; +- fn aarch64_vaddlv_u32(x: u32x2) -> u64; +- fn aarch64_vaddlvq_s8(x: i8x16) -> i16; +- fn aarch64_vaddlvq_u8(x: u8x16) -> u16; +- fn aarch64_vaddlvq_s16(x: i16x8) -> i32; +- fn aarch64_vaddlvq_u16(x: u16x8) -> u32; +- fn aarch64_vaddlvq_s32(x: i32x4) -> i64; +- fn aarch64_vaddlvq_u32(x: u32x4) -> u64; +- fn aarch64_vmaxv_s8(x: i8x8) -> i8; +- fn aarch64_vmaxv_u8(x: u8x8) -> u8; +- fn aarch64_vmaxv_s16(x: i16x4) -> i16; +- fn aarch64_vmaxv_u16(x: u16x4) -> u16; +- fn aarch64_vmaxv_s32(x: i32x2) -> i32; +- fn aarch64_vmaxv_u32(x: u32x2) -> u32; +- fn aarch64_vmaxv_f32(x: f32x2) -> f32; +- fn aarch64_vmaxvq_s8(x: i8x16) -> i8; +- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; +- fn aarch64_vmaxvq_s16(x: i16x8) -> i16; +- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; +- fn aarch64_vmaxvq_s32(x: i32x4) -> i32; +- fn aarch64_vmaxvq_u32(x: u32x4) -> u32; +- fn aarch64_vmaxvq_f32(x: f32x4) -> f32; +- fn aarch64_vmaxvq_f64(x: f64x2) -> f64; +- fn aarch64_vminv_s8(x: i8x8) -> i8; +- fn aarch64_vminv_u8(x: u8x8) -> u8; +- fn aarch64_vminv_s16(x: i16x4) -> i16; +- fn aarch64_vminv_u16(x: u16x4) -> u16; +- fn aarch64_vminv_s32(x: i32x2) -> i32; +- fn aarch64_vminv_u32(x: u32x2) -> u32; +- fn aarch64_vminv_f32(x: f32x2) -> f32; +- fn aarch64_vminvq_s8(x: i8x16) -> i8; +- fn aarch64_vminvq_u8(x: u8x16) -> u8; +- fn aarch64_vminvq_s16(x: i16x8) -> i16; +- fn aarch64_vminvq_u16(x: u16x8) -> u16; +- fn aarch64_vminvq_s32(x: i32x4) -> i32; +- fn aarch64_vminvq_u32(x: u32x4) -> u32; +- fn aarch64_vminvq_f32(x: f32x4) -> f32; +- fn aarch64_vminvq_f64(x: f64x2) -> f64; +- fn aarch64_vmaxnmv_f32(x: f32x2) -> f32; +- fn aarch64_vmaxnmvq_f32(x: f32x4) -> f32; +- fn aarch64_vmaxnmvq_f64(x: f64x2) -> f64; +- fn aarch64_vminnmv_f32(x: f32x2) -> f32; +- fn aarch64_vminnmvq_f32(x: f32x4) -> f32; +- fn aarch64_vminnmvq_f64(x: f64x2) -> f64; +- fn aarch64_vqtbl1_s8(x: i8x16, y: u8x8) -> i8x8; +- fn aarch64_vqtbl1_u8(x: u8x16, y: u8x8) -> u8x8; +- fn aarch64_vqtbl1q_s8(x: i8x16, y: u8x16) -> i8x16; +- fn aarch64_vqtbl1q_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqtbx1_s8(x: i8x8, y: i8x16, z: u8x8) -> i8x8; +- fn aarch64_vqtbx1_u8(x: u8x8, y: u8x16, z: u8x8) -> u8x8; +- fn aarch64_vqtbx1q_s8(x: i8x16, y: i8x16, z: u8x16) -> i8x16; +- fn aarch64_vqtbx1q_u8(x: u8x16, y: u8x16, z: u8x16) -> u8x16; +- fn aarch64_vqtbl2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbx2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbx2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbx2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbl3_s8(x: (i8x16, i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl3_u8(x: (u8x16, u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl3q_s8(x: (i8x16, i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl3q_u8(x: (u8x16, u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx3_s8(x: i8x8, y: (i8x16, i8x16, i8x16), z: u8x8) -> i8x8; +- fn aarch64_vqtbx3_u8(x: u8x8, y: (u8x16, u8x16, u8x16), z: u8x8) -> u8x8; +- fn aarch64_vqtbx3q_s8(x: i8x16, y: (i8x16, i8x16, i8x16), z: u8x16) -> i8x16; +- fn aarch64_vqtbx3q_u8(x: u8x16, y: (u8x16, u8x16, u8x16), z: u8x16) -> u8x16; +- fn aarch64_vqtbl4_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl4_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl4q_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl4q_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx4_s8(x: i8x8, y: (i8x16, i8x16, i8x16, i8x16), z: u8x8) -> i8x8; +- fn aarch64_vqtbx4_u8(x: u8x8, y: (u8x16, u8x16, u8x16, u8x16), z: u8x8) -> u8x8; +- fn aarch64_vqtbx4q_s8(x: i8x16, y: (i8x16, i8x16, i8x16, i8x16), z: u8x16) -> i8x16; +- fn aarch64_vqtbx4q_u8(x: u8x16, y: (u8x16, u8x16, u8x16, u8x16), z: u8x16) -> u8x16; +-} +- +-pub trait Aarch64F32x4 { +- fn to_f64(self) -> f64x2; +-} +-impl Aarch64F32x4 for f32x4 { +- #[inline] +- fn to_f64(self) -> f64x2 { +- unsafe { +- simd_cast(f32x2(self.0, self.1)) +- } +- } +-} +- +-pub trait Aarch64U8x16 { +- fn table_lookup_1(self, t0: u8x16) -> u8x16; +-} +-impl Aarch64U8x16 for u8x16 { +- #[inline] +- fn table_lookup_1(self, t0: u8x16) -> u8x16 { +- unsafe {aarch64_vqtbl1q_u8(t0, self)} +- } +-} +-pub trait Aarch64I8x16 { +- fn table_lookup_1(self, t0: i8x16) -> i8x16; +-} +-impl Aarch64I8x16 for i8x16 { +- #[inline] +- fn table_lookup_1(self, t0: i8x16) -> i8x16 { +- unsafe {aarch64_vqtbl2q_s8((t0, t0), ::bitcast(self))} +- } +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vsqrtq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vrsqrteq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vrecpeq_f32(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::aarch64_vmaxq_f32(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::aarch64_vminq_f32(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$min(mem::transmute(x)) != 0 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$max(mem::transmute(x)) != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, bool32fx4_all(aarch64_vminvq_u32), bool32fx4_any(aarch64_vmaxvq_u32); +- bool8ix16, bool8ix16_all(aarch64_vminvq_u8), bool8ix16_any(aarch64_vmaxvq_u8); +- bool16ix8, bool16ix8_all(aarch64_vminvq_u16), bool16ix8_any(aarch64_vmaxvq_u16); +- bool32ix4, bool32ix4_all(aarch64_vminvq_u32), bool32ix4_any(aarch64_vmaxvq_u32); +- } +-} +diff --git a/third_party/rust/simd/src/arm/mod.rs b/third_party/rust/simd/src/arm/mod.rs +deleted file mode 100644 +index 0d451103840b..000000000000 +--- a/third_party/rust/simd/src/arm/mod.rs ++++ /dev/null +@@ -1,4 +0,0 @@ +-//! Features specific to ARM CPUs. +- +-#[cfg(any(feature = "doc", target_feature = "neon"))] +-pub mod neon; +diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs +deleted file mode 100644 +index 8c90a72bb0dc..000000000000 +--- a/third_party/rust/simd/src/arm/neon.rs ++++ /dev/null +@@ -1,622 +0,0 @@ +-use super::super::*; +-use sixty_four::{i64x2, u64x2}; +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u32x2(u32, u32); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i32x2(i32, i32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix2(i32, i32); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct f32x2(f32, f32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx2(i32, i32); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u16x4(u16, u16, u16, u16); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i16x4(i16, i16, i16, i16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix4(i16, i16, i16, i16); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u8x8(u8, u8, u8, u8, +- u8, u8, u8, u8); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i8x8(i8, i8, i8, i8, +- i8, i8, i8, i8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix8(i8, i8, i8, i8, +- i8, i8, i8, i8); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i64x1(i64); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u64x1(u64); +- +-macro_rules! half_bools { +- ($($ty: ty, $as_u: ty, $elem: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- impl $ty { +- #[inline] +- pub fn $all(self) -> bool { +- unsafe { +- let t: $as_u = bitcast(self); +- let y = $min(t, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 == 0xFFFFFFFF +- } +- } +- #[inline] +- pub fn $any(self) -> bool { +- unsafe { +- let t: $as_u = bitcast(self); +- let y = $max(t, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 != 0 +- } +- } +- } +- +- impl Clone for $ty { +- #[inline] fn clone(&self) -> Self { +- *self +- } +- } +- +- unsafe impl Simd for $ty { +- type Bool = $ty; +- type Elem = $elem; +- } +- +- )* +- } +-} +- +-half_bools! { +- bool32fx2, u32x2, i32, bool32fx2_all(arm_vpmin_u32), bool32fx2_any(arm_vpmax_u32); +- bool8ix8, u8x8, i8, bool8ix8_all(arm_vpmin_u8), bool8ix8_any(arm_vpmax_u8); +- bool16ix4, u16x4, i16, bool16ix4_all(arm_vpmin_u16), bool16ix4_any(arm_vpmax_u16); +- bool32ix2, u32x2, f32, bool32ix2_all(arm_vpmin_u32), bool32ix2_any(arm_vpmax_u32); +-} +- +-macro_rules! half_simd { +- ($($ty: ty, $elem: ty, $bool_ty: ty;)*) => { +- $( +- unsafe impl Simd for $ty { +- type Bool = $bool_ty; +- type Elem = $elem; +- } +- )* +- } +-} +- +-half_simd! { +- f32x2, f32, bool32fx2; +- u32x2, u32, bool32ix2; +- i32x2, i32, bool32ix2; +- u16x4, u16, bool16ix4; +- i16x4, i16, bool16ix4; +- u8x8, u8, bool8ix8; +- i8x8, i8, bool8ix8; +-} +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn arm_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn arm_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn arm_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn arm_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn arm_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn arm_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn arm_vfma_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vmull_s8(x: i8x8, y: i8x8) -> i16x8; +- fn arm_vmull_u8(x: u8x8, y: u8x8) -> u16x8; +- fn arm_vmull_s16(x: i16x4, y: i16x4) -> i32x4; +- fn arm_vmull_u16(x: u16x4, y: u16x4) -> u32x4; +- fn arm_vmull_s32(x: i32x2, y: i32x2) -> i64x2; +- fn arm_vmull_u32(x: u32x2, y: u32x2) -> u64x2; +- fn arm_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; +- fn arm_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; +- fn arm_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn arm_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn arm_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn arm_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn arm_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn arm_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn arm_vabd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vabd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vabd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vabd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vabd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vabd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vabd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vsri_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vsri_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vsri_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vsri_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vsri_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vsri_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vsri_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vsri_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vsli_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vsli_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vsli_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vsli_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vsli_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vsli_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vsli_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vsli_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vvqmovn_s16(x: i16x8) -> i8x8; +- fn arm_vvqmovn_u16(x: u16x8) -> u8x8; +- fn arm_vvqmovn_s32(x: i32x4) -> i16x4; +- fn arm_vvqmovn_u32(x: u32x4) -> u16x4; +- fn arm_vvqmovn_s64(x: i64x2) -> i32x2; +- fn arm_vvqmovn_u64(x: u64x2) -> u32x2; +- fn arm_vabs_s8(x: i8x8) -> i8x8; +- fn arm_vabs_s16(x: i16x4) -> i16x4; +- fn arm_vabs_s32(x: i32x2) -> i32x2; +- fn arm_vabsq_s8(x: i8x16) -> i8x16; +- fn arm_vabsq_s16(x: i16x8) -> i16x8; +- fn arm_vabsq_s32(x: i32x4) -> i32x4; +- fn arm_vabs_f32(x: f32x2) -> f32x2; +- fn arm_vabsq_f32(x: f32x4) -> f32x4; +- fn arm_vqabs_s8(x: i8x8) -> i8x8; +- fn arm_vqabs_s16(x: i16x4) -> i16x4; +- fn arm_vqabs_s32(x: i32x2) -> i32x2; +- fn arm_vqabsq_s8(x: i8x16) -> i8x16; +- fn arm_vqabsq_s16(x: i16x8) -> i16x8; +- fn arm_vqabsq_s32(x: i32x4) -> i32x4; +- fn arm_vqneg_s8(x: i8x8) -> i8x8; +- fn arm_vqneg_s16(x: i16x4) -> i16x4; +- fn arm_vqneg_s32(x: i32x2) -> i32x2; +- fn arm_vqnegq_s8(x: i8x16) -> i8x16; +- fn arm_vqnegq_s16(x: i16x8) -> i16x8; +- fn arm_vqnegq_s32(x: i32x4) -> i32x4; +- fn arm_vclz_s8(x: i8x8) -> i8x8; +- fn arm_vclz_u8(x: u8x8) -> u8x8; +- fn arm_vclz_s16(x: i16x4) -> i16x4; +- fn arm_vclz_u16(x: u16x4) -> u16x4; +- fn arm_vclz_s32(x: i32x2) -> i32x2; +- fn arm_vclz_u32(x: u32x2) -> u32x2; +- fn arm_vclzq_s8(x: i8x16) -> i8x16; +- fn arm_vclzq_u8(x: u8x16) -> u8x16; +- fn arm_vclzq_s16(x: i16x8) -> i16x8; +- fn arm_vclzq_u16(x: u16x8) -> u16x8; +- fn arm_vclzq_s32(x: i32x4) -> i32x4; +- fn arm_vclzq_u32(x: u32x4) -> u32x4; +- fn arm_vcls_s8(x: i8x8) -> i8x8; +- fn arm_vcls_u8(x: u8x8) -> u8x8; +- fn arm_vcls_s16(x: i16x4) -> i16x4; +- fn arm_vcls_u16(x: u16x4) -> u16x4; +- fn arm_vcls_s32(x: i32x2) -> i32x2; +- fn arm_vcls_u32(x: u32x2) -> u32x2; +- fn arm_vclsq_s8(x: i8x16) -> i8x16; +- fn arm_vclsq_u8(x: u8x16) -> u8x16; +- fn arm_vclsq_s16(x: i16x8) -> i16x8; +- fn arm_vclsq_u16(x: u16x8) -> u16x8; +- fn arm_vclsq_s32(x: i32x4) -> i32x4; +- fn arm_vclsq_u32(x: u32x4) -> u32x4; +- fn arm_vcnt_s8(x: i8x8) -> i8x8; +- fn arm_vcnt_u8(x: u8x8) -> u8x8; +- fn arm_vcntq_s8(x: i8x16) -> i8x16; +- fn arm_vcntq_u8(x: u8x16) -> u8x16; +- fn arm_vrecpe_u32(x: u32x2) -> u32x2; +- fn arm_vrecpe_f32(x: f32x2) -> f32x2; +- fn arm_vrecpeq_u32(x: u32x4) -> u32x4; +- fn arm_vrecpeq_f32(x: f32x4) -> f32x4; +- fn arm_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vsqrt_f32(x: f32x2) -> f32x2; +- fn arm_vsqrtq_f32(x: f32x4) -> f32x4; +- fn arm_vrsqrte_u32(x: u32x2) -> u32x2; +- fn arm_vrsqrte_f32(x: f32x2) -> f32x2; +- fn arm_vrsqrteq_u32(x: u32x4) -> u32x4; +- fn arm_vrsqrteq_f32(x: f32x4) -> f32x4; +- fn arm_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vbsl_s8(x: u8x8, y: i8x8) -> i8x8; +- fn arm_vbsl_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vbsl_s16(x: u16x4, y: i16x4) -> i16x4; +- fn arm_vbsl_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vbsl_s32(x: u32x2, y: i32x2) -> i32x2; +- fn arm_vbsl_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vbsl_s64(x: u64x1, y: i64x1) -> i64x1; +- fn arm_vbsl_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vbslq_s8(x: u8x16, y: i8x16) -> i8x16; +- fn arm_vbslq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vbslq_s16(x: u16x8, y: i16x8) -> i16x8; +- fn arm_vbslq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vbslq_s32(x: u32x4, y: i32x4) -> i32x4; +- fn arm_vbslq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vbslq_s64(x: u64x2, y: i64x2) -> i64x2; +- fn arm_vbslq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpaddl_s16(x: i8x8) -> i16x4; +- fn arm_vpaddl_u16(x: u8x8) -> u16x4; +- fn arm_vpaddl_s32(x: i16x4) -> i32x2; +- fn arm_vpaddl_u32(x: u16x4) -> u32x2; +- fn arm_vpaddl_s64(x: i32x2) -> i64x1; +- fn arm_vpaddl_u64(x: u32x2) -> u64x1; +- fn arm_vpaddlq_s16(x: i8x16) -> i16x8; +- fn arm_vpaddlq_u16(x: u8x16) -> u16x8; +- fn arm_vpaddlq_s32(x: i16x8) -> i32x4; +- fn arm_vpaddlq_u32(x: u16x8) -> u32x4; +- fn arm_vpaddlq_s64(x: i32x4) -> i64x2; +- fn arm_vpaddlq_u64(x: u32x4) -> u64x2; +- fn arm_vpadal_s16(x: i16x4, y: i8x8) -> i16x4; +- fn arm_vpadal_u16(x: u16x4, y: u8x8) -> u16x4; +- fn arm_vpadal_s32(x: i32x2, y: i16x4) -> i32x2; +- fn arm_vpadal_u32(x: u32x2, y: u16x4) -> u32x2; +- fn arm_vpadal_s64(x: i64x1, y: i32x2) -> i64x1; +- fn arm_vpadal_u64(x: u64x1, y: u32x2) -> u64x1; +- fn arm_vpadalq_s16(x: i16x8, y: i8x16) -> i16x8; +- fn arm_vpadalq_u16(x: u16x8, y: u8x16) -> u16x8; +- fn arm_vpadalq_s32(x: i32x4, y: i16x8) -> i32x4; +- fn arm_vpadalq_u32(x: u32x4, y: u16x8) -> u32x4; +- fn arm_vpadalq_s64(x: i64x2, y: i32x4) -> i64x2; +- fn arm_vpadalq_u64(x: u64x2, y: u32x4) -> u64x2; +- fn arm_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vtbl1_s8(x: i8x8, y: u8x8) -> i8x8; +- fn arm_vtbl1_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vtbx1_s8(x: i8x8, y: i8x8, z: u8x8) -> i8x8; +- fn arm_vtbx1_u8(x: u8x8, y: u8x8, z: u8x8) -> u8x8; +- fn arm_vtbl2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbx2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbl3_s8(x: (i8x8, i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl3_u8(x: (u8x8, u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx3_s8(x: i8x8, y: (i8x8, i8x8, i8x8), z: u8x8) -> i8x8; +- fn arm_vtbx3_u8(x: u8x8, y: (u8x8, u8x8, u8x8), z: u8x8) -> u8x8; +- fn arm_vtbl4_s8(x: (i8x8, i8x8, i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl4_u8(x: (u8x8, u8x8, u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx4_s8(x: i8x8, y: (i8x8, i8x8, i8x8, i8x8), z: u8x8) -> i8x8; +- fn arm_vtbx4_u8(x: u8x8, y: (u8x8, u8x8, u8x8, u8x8), z: u8x8) -> u8x8; +-} +- +- +-impl u8x8 { +- #[inline] +- pub fn table_lookup_1(self, t0: u8x8) -> u8x8 { +- unsafe {arm_vtbl1_u8(t0, self)} +- } +- #[inline] +- pub fn table_lookup_2(self, t0: u8x8, t1: u8x8) -> u8x8 { +- unsafe {arm_vtbl2_u8((t0, t1), self)} +- } +- #[inline] +- pub fn table_lookup_3(self, t0: u8x8, t1: u8x8, t2: u8x8) -> u8x8 { +- unsafe {arm_vtbl3_u8((t0, t1, t2), self)} +- } +- #[inline] +- pub fn table_lookup_4(self, t0: u8x8, t1: u8x8, t2: u8x8, t3: u8x8) -> u8x8 { +- unsafe {arm_vtbl4_u8((t0, t1, t2, t3), self)} +- } +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::arm_vsqrtq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::arm_vrsqrteq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::arm_vrecpeq_f32(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::arm_vmaxq_f32(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::arm_vminq_f32(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $as_u: ty, $shuffle_fn: ident, $lo_idxs: expr, $hi_idxs: expr, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- let t: $as_u = bitcast(x); +- let lo = $shuffle_fn(t, t, $lo_idxs); +- let hi = $shuffle_fn(t, t, $hi_idxs); +- let x = super::$min(lo, hi); +- let y = super::$min(x, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 == 0xFFFFFFFF +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- let t: $as_u = bitcast(x); +- let lo = $shuffle_fn(t, t, $lo_idxs); +- let hi = $shuffle_fn(t, t, $hi_idxs); +- let x = super::$max(lo, hi); +- let y = super::$max(x, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32fx4_all(arm_vpmin_u32), bool32fx4_any(arm_vpmax_u32); +- bool8ix16, u8x16, simd_shuffle8, [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15], bool8ix16_all(arm_vpmin_u8), bool8ix16_any(arm_vpmax_u8); +- bool16ix8, u16x8, simd_shuffle4, [0, 1, 2, 3], [4, 5, 6, 7], bool16ix8_all(arm_vpmin_u16), bool16ix8_any(arm_vpmax_u16); +- bool32ix4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32ix4_all(arm_vpmin_u32), bool32ix4_any(arm_vpmax_u32); +- } +-} +diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs +deleted file mode 100644 +index 1052ae36959d..000000000000 +--- a/third_party/rust/simd/src/common.rs ++++ /dev/null +@@ -1,520 +0,0 @@ +-use super::*; +-#[allow(unused_imports)] +-use super::{ +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- +- Unalign, bitcast, +-}; +-use core::{mem,ops}; +- +-#[cfg(any(target_arch = "x86", +- target_arch = "x86_64"))] +-use x86::sse2::common; +-#[cfg(any(target_arch = "arm"))] +-use arm::neon::common; +-#[cfg(any(target_arch = "aarch64"))] +-use aarch64::neon::common; +- +-macro_rules! basic_impls { +- ($( +- $name: ident: +- $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*; +- )*) => { +- $(impl $name { +- /// Create a new instance. +- #[inline] +- pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name { +- $name($($first),*, $($last),*) +- } +- +- /// Create a new instance where every lane has value `x`. +- #[inline] +- pub const fn splat(x: $elem) -> $name { +- $name($({ #[allow(dead_code)] struct $first; x }),*, +- $({ #[allow(dead_code)] struct $last; x }),*) +- } +- +- /// Compare for equality. +- #[inline] +- pub fn eq(self, other: Self) -> $bool { +- unsafe {simd_eq(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn ne(self, other: Self) -> $bool { +- unsafe {simd_ne(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn lt(self, other: Self) -> $bool { +- unsafe {simd_lt(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn le(self, other: Self) -> $bool { +- unsafe {simd_le(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn gt(self, other: Self) -> $bool { +- unsafe {simd_gt(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn ge(self, other: Self) -> $bool { +- unsafe {simd_ge(self, other)} +- } +- +- /// Extract the value of the `idx`th lane of `self`. +- /// +- /// # Panics +- /// +- /// `extract` will panic if `idx` is out of bounds. +- #[inline] +- pub fn extract(self, idx: u32) -> $elem { +- assert!(idx < $length); +- unsafe {simd_extract(self, idx)} +- } +- /// Return a new vector where the `idx`th lane is replaced +- /// by `elem`. +- /// +- /// # Panics +- /// +- /// `replace` will panic if `idx` is out of bounds. +- #[inline] +- pub fn replace(self, idx: u32, elem: $elem) -> Self { +- assert!(idx < $length); +- unsafe {simd_insert(self, idx, elem)} +- } +- +- /// Load a new value from the `idx`th position of `array`. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// Self::new(array[idx], array[idx + 1], ...) +- /// ``` +- /// +- /// # Panics +- /// +- /// `load` will panic if `idx` is out of bounds in +- /// `array`, or if `array[idx..]` is too short. +- #[inline] +- pub fn load(array: &[$elem], idx: usize) -> Self { +- let data = &array[idx..idx + $length]; +- let loaded = unsafe { +- *(data.as_ptr() as *const Unalign) +- }; +- loaded.0 +- } +- +- /// Store the elements of `self` to `array`, starting at +- /// the `idx`th position. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// array[i] = self.extract(0); +- /// array[i + 1] = self.extract(1); +- /// // ... +- /// ``` +- /// +- /// # Panics +- /// +- /// `store` will panic if `idx` is out of bounds in +- /// `array`, or if `array[idx...]` is too short. +- #[inline] +- pub fn store(self, array: &mut [$elem], idx: usize) { +- let place = &mut array[idx..idx + $length]; +- unsafe { +- *(place.as_mut_ptr() as *mut Unalign) = Unalign(self) +- } +- } +- })* +- } +-} +- +-basic_impls! { +- u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3; +- +- u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- +- u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +-} +- +-macro_rules! bool_impls { +- ($( +- $name: ident: +- $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident, +- $($first: ident),* | $($last: ident),* +- [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*]; +- )*) => { +- $(impl $name { +- /// Convert to integer representation. +- #[inline] +- pub fn to_repr(self) -> $repr { +- unsafe {mem::transmute(self)} +- } +- /// Convert from integer representation. +- #[inline] +- #[inline] +- pub fn from_repr(x: $repr) -> Self { +- unsafe {mem::transmute(x)} +- } +- +- /// Create a new instance. +- #[inline] +- pub fn new($($first: bool),*, $($last: bool),*) -> $name { +- unsafe { +- // negate everything together +- simd_sub($name::splat(false), +- $name($( ($first as $repr_elem) ),*, +- $( ($last as $repr_elem) ),*)) +- } +- } +- +- /// Create a new instance where every lane has value `x`. +- #[allow(unused_variables)] +- #[inline] +- pub fn splat(x: bool) -> $name { +- let x = if x {!(0 as $repr_elem)} else {0}; +- $name($({ let $first = (); x}),*, +- $({ let $last = (); x}),*) +- } +- +- /// Extract the value of the `idx`th lane of `self`. +- /// +- /// # Panics +- /// +- /// `extract` will panic if `idx` is out of bounds. +- #[inline] +- pub fn extract(self, idx: u32) -> bool { +- assert!(idx < $length); +- unsafe {simd_extract(self.to_repr(), idx) != 0} +- } +- /// Return a new vector where the `idx`th lane is replaced +- /// by `elem`. +- /// +- /// # Panics +- /// +- /// `replace` will panic if `idx` is out of bounds. +- #[inline] +- pub fn replace(self, idx: u32, elem: bool) -> Self { +- assert!(idx < $length); +- let x = if elem {!(0 as $repr_elem)} else {0}; +- unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))} +- } +- /// Select between elements of `then` and `else_`, based on +- /// the corresponding element of `self`. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) }, +- /// if self.extract(1) { then.extract(1) } else { else_.extract(1) }, +- /// ...) +- /// ``` +- #[inline] +- pub fn select>(self, then: T, else_: T) -> T { +- let then: $repr = bitcast(then); +- let else_: $repr = bitcast(else_); +- bitcast((then & self.to_repr()) | (else_ & (!self).to_repr())) +- } +- +- /// Check if every element of `self` is true. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// self.extract(0) && self.extract(1) && ... +- /// ``` +- #[inline] +- pub fn all(self) -> bool { +- common::$all(self) +- } +- /// Check if any element of `self` is true. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// self.extract(0) || self.extract(1) || ... +- /// ``` +- #[inline] +- pub fn any(self) -> bool { +- common::$any(self) +- } +- +- $( +- #[$cvt_meta] +- #[inline] +- pub fn $cvt(self) -> $cvt_to { +- bitcast(self) +- } +- )* +- } +- impl ops::Not for $name { +- type Output = Self; +- +- #[inline] +- fn not(self) -> Self { +- Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr()) +- } +- } +- )* +- } +-} +- +-bool_impls! { +- bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool32fx4]; +- bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool32ix4]; +- +- bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 []; +- +- bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; +-} +- +-impl u32x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe {simd_cast(self)} +- } +-} +-impl i32x4 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe {simd_cast(self)} +- } +-} +-impl f32x4 { +- /// Compute the square root of each lane. +- #[inline] +- pub fn sqrt(self) -> Self { +- common::f32x4_sqrt(self) +- } +- /// Compute an approximation to the reciprocal of the square root +- /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`. +- /// +- /// The accuracy of this approximation is platform dependent. +- #[inline] +- pub fn approx_rsqrt(self) -> Self { +- common::f32x4_approx_rsqrt(self) +- } +- /// Compute an approximation to the reciprocal of `self`, that is, +- /// `f32::splat(1.0) / self`. +- /// +- /// The accuracy of this approximation is platform dependent. +- #[inline] +- pub fn approx_reciprocal(self) -> Self { +- common::f32x4_approx_reciprocal(self) +- } +- /// Compute the lane-wise maximum of `self` and `other`. +- /// +- /// This is equivalent to the following, but is possibly more +- /// efficient: +- /// +- /// ```rust,ignore +- /// f32x4::new(self.extract(0).max(other.extract(0)), +- /// self.extract(1).max(other.extract(1)), +- /// ...) +- /// ``` +- #[inline] +- pub fn max(self, other: Self) -> Self { +- common::f32x4_max(self, other) +- } +- /// Compute the lane-wise minimum of `self` and `other`. +- /// +- /// This is equivalent to the following, but is possibly more +- /// efficient: +- /// +- /// ```rust,ignore +- /// f32x4::new(self.extract(0).min(other.extract(0)), +- /// self.extract(1).min(other.extract(1)), +- /// ...) +- /// ``` +- #[inline] +- pub fn min(self, other: Self) -> Self { +- common::f32x4_min(self, other) +- } +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i16x8 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u16(self) -> u16x8 { +- unsafe {simd_cast(self)} +- } +-} +-impl u16x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i16(self) -> i16x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i8x16 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u8(self) -> u8x16 { +- unsafe {simd_cast(self)} +- } +-} +-impl u8x16 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i8(self) -> i8x16 { +- unsafe {simd_cast(self)} +- } +-} +- +- +-macro_rules! neg_impls { +- ($zero: expr, $($ty: ident,)*) => { +- $(impl ops::Neg for $ty { +- type Output = Self; +- fn neg(self) -> Self { +- $ty::splat($zero) - self +- } +- })* +- } +-} +-neg_impls!{ +- 0, +- i32x4, +- i16x8, +- i8x16, +-} +-neg_impls! { +- 0.0, +- f32x4, +-} +-macro_rules! not_impls { +- ($($ty: ident,)*) => { +- $(impl ops::Not for $ty { +- type Output = Self; +- fn not(self) -> Self { +- $ty::splat(!0) ^ self +- } +- })* +- } +-} +-not_impls! { +- i32x4, +- i16x8, +- i8x16, +- u32x4, +- u16x8, +- u8x16, +-} +- +-macro_rules! operators { +- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { +- $( +- $(impl ops::$trayt for $ty { +- type Output = Self; +- #[inline] +- fn $method(self, x: Self) -> Self { +- unsafe {$func(self, x)} +- } +- })* +- )* +- } +-} +-operators! { +- Add (simd_add, add): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Sub (simd_sub, sub): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Mul (simd_mul, mul): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Div (simd_div, div): f32x4; +- +- BitAnd (simd_and, bitand): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +- BitOr (simd_or, bitor): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +- BitXor (simd_xor, bitxor): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +-} +- +-macro_rules! shift_one { +- ($ty: ident, $($by: ident),*) => { +- $( +- impl ops::Shl<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shl(self, other: $by) -> Self { +- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } +- } +- } +- impl ops::Shr<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shr(self, other: $by) -> Self { +- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} +- } +- } +- )* +- } +-} +- +-macro_rules! shift { +- ($($ty: ident),*) => { +- $(shift_one! { +- $ty, +- u8, u16, u32, u64, usize, +- i8, i16, i32, i64, isize +- })* +- } +-} +-shift! { +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4 +-} +diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs +deleted file mode 100644 +index e8fb1b16f53b..000000000000 +--- a/third_party/rust/simd/src/lib.rs ++++ /dev/null +@@ -1,804 +0,0 @@ +-//! `simd` offers a basic interface to the SIMD functionality of CPUs. +-#![no_std] +- +-#![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)] +-#![allow(non_camel_case_types)] +- +-#[cfg(feature = "with-serde")] +-extern crate serde; +-#[cfg(feature = "with-serde")] +-#[macro_use] +-extern crate serde_derive; +- +-use core::mem; +- +-/// Boolean type for 8-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool8i(i8); +-/// Boolean type for 16-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool16i(i16); +-/// Boolean type for 32-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool32i(i32); +-/// Boolean type for 32-bit floats. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool32f(i32); +- +-macro_rules! bool { +- ($($name: ident, $inner: ty;)*) => { +- $( +- impl From for $name { +- #[inline] +- fn from(b: bool) -> $name { +- $name(-(b as $inner)) +- } +- } +- impl From<$name> for bool { +- #[inline] +- fn from(b: $name) -> bool { +- b.0 != 0 +- } +- } +- )* +- } +-} +-bool! { +- bool8i, i8; +- bool16i, i16; +- bool32i, i32; +- bool32f, i32; +-} +- +-/// Types that are SIMD vectors. +-pub unsafe trait Simd { +- /// The corresponding boolean vector type. +- type Bool: Simd; +- /// The element that this vector stores. +- type Elem; +-} +- +-/// A SIMD vector of 4 `u32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u32x4(u32, u32, u32, u32); +-/// A SIMD vector of 4 `i32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i32x4(i32, i32, i32, i32); +-/// A SIMD vector of 4 `f32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f32x4(f32, f32, f32, f32); +-/// A SIMD boolean vector for length-4 vectors of 32-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix4(i32, i32, i32, i32); +-/// A SIMD boolean vector for length-4 vectors of 32-bit floats. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx4(i32, i32, i32, i32); +- +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct u32x2(u32, u32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct i32x2(i32, i32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct f32x2(f32, f32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct bool32ix2(i32, i32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct bool32fx2(i32, i32); +- +-/// A SIMD vector of 8 `u16`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u16x8(u16, u16, u16, u16, +- u16, u16, u16, u16); +-/// A SIMD vector of 8 `i16`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i16x8(i16, i16, i16, i16, +- i16, i16, i16, i16); +-/// A SIMD boolean vector for length-8 vectors of 16-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix8(i16, i16, i16, i16, +- i16, i16, i16, i16); +- +-/// A SIMD vector of 16 `u8`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8); +-/// A SIMD vector of 16 `i8`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +-/// A SIMD boolean vector for length-16 vectors of 8-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix16(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +- +- +-macro_rules! simd { +- ($($bool: ty: $($ty: ty = $elem: ty),*;)*) => { +- $($(unsafe impl Simd for $ty { +- type Bool = $bool; +- type Elem = $elem; +- } +- impl Clone for $ty { #[inline] fn clone(&self) -> Self { *self } } +- )*)*} +-} +-simd! { +- bool8ix16: i8x16 = i8, u8x16 = u8, bool8ix16 = bool8i; +- bool16ix8: i16x8 = i16, u16x8 = u16, bool16ix8 = bool16i; +- bool32ix4: i32x4 = i32, u32x4 = u32, bool32ix4 = bool32i; +- bool32fx4: f32x4 = f32, bool32fx4 = bool32f; +- +- bool32ix2: i32x2 = i32, u32x2 = u32, bool32ix2 = bool32i; +- bool32fx2: f32x2 = f32, bool32fx2 = bool32f; +-} +- +-#[allow(dead_code)] +-#[inline] +-fn bitcast(x: T) -> U { +- assert_eq!(mem::size_of::(), +- mem::size_of::()); +- unsafe {mem::transmute_copy(&x)} +-} +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn simd_eq, U>(x: T, y: T) -> U; +- fn simd_ne, U>(x: T, y: T) -> U; +- fn simd_lt, U>(x: T, y: T) -> U; +- fn simd_le, U>(x: T, y: T) -> U; +- fn simd_gt, U>(x: T, y: T) -> U; +- fn simd_ge, U>(x: T, y: T) -> U; +- +- fn simd_shuffle2>(x: T, y: T, idx: [u32; 2]) -> U; +- fn simd_shuffle4>(x: T, y: T, idx: [u32; 4]) -> U; +- fn simd_shuffle8>(x: T, y: T, idx: [u32; 8]) -> U; +- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; +- +- fn simd_insert, U>(x: T, idx: u32, val: U) -> T; +- fn simd_extract, U>(x: T, idx: u32) -> U; +- +- fn simd_cast(x: T) -> U; +- +- fn simd_add(x: T, y: T) -> T; +- fn simd_sub(x: T, y: T) -> T; +- fn simd_mul(x: T, y: T) -> T; +- fn simd_div(x: T, y: T) -> T; +- fn simd_shl(x: T, y: T) -> T; +- fn simd_shr(x: T, y: T) -> T; +- fn simd_and(x: T, y: T) -> T; +- fn simd_or(x: T, y: T) -> T; +- fn simd_xor(x: T, y: T) -> T; +-} +-#[repr(packed)] +-#[derive(Copy)] +-struct Unalign(T); +- +-impl Clone for Unalign { +- fn clone(&self) -> Unalign { +- Unalign(unsafe { self.0.clone() }) +- } +-} +- +-#[macro_use] +-mod common; +-mod sixty_four; +-mod v256; +- +-#[cfg(any(feature = "doc", +- target_arch = "x86", +- target_arch = "x86_64"))] +-pub mod x86; +-#[cfg(any(feature = "doc", target_arch = "arm"))] +-pub mod arm; +-#[cfg(any(feature = "doc", target_arch = "aarch64"))] +-pub mod aarch64; +- +-#[cfg(test)] +-mod tests { +- +- use super::u8x16; +- use super::u16x8; +- use super::u32x4; +- use super::f32x4; +- +- #[test] +- fn test_u8x16_none_not_any() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_none_not_all() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_all_any() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_all_all() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_last_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_last_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_first_any() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_first_not_all() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_last_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_last_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_first_any() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_first_not_all() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_thirteenth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_thirteenth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_fifth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_fifth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_thirteenth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_thirteenth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_fifth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_fifth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_none_not_any() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_none_not_all() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_all_any() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_all_all() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_last_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_last_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_first_any() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_first_not_all() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_last_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_last_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_first_any() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_first_not_all() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_sixth_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_sixth_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_third_any() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_third_not_all() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_sixth_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_sixth_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_third_any() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_third_not_all() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_none_not_any() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_none_not_all() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_all_any() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_all_all() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_last_any() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_last_not_all() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_first_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_first_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_last_any() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_last_not_all() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_first_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_first_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_second_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_second_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_third_any() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_third_not_all() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_second_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_second_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_third_any() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_third_not_all() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_none_not_any() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_none_not_all() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_all_any() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_all_all() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_last_any() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_last_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_first_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_first_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_last_any() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_last_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_first_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_first_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_second_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_second_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_third_any() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_third_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_second_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_second_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_third_any() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_third_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +-} +diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs +deleted file mode 100644 +index a87f44a77ee7..000000000000 +--- a/third_party/rust/simd/src/sixty_four.rs ++++ /dev/null +@@ -1,228 +0,0 @@ +-#![allow(dead_code)] +-use super::*; +-#[allow(unused_imports)] +-use super::{ +- f32x2, +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- +- Unalign, bitcast, +-}; +-use core::{mem,ops}; +- +-/// Boolean type for 64-bit integers. +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone)] +-pub struct bool64i(i64); +-/// Boolean type for 64-bit floats. +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone)] +-pub struct bool64f(i64); +-/// A SIMD vector of 2 `u64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u64x2(u64, u64); +-/// A SIMD vector of 2 `i64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i64x2(i64, i64); +-/// A SIMD vector of 2 `f64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f64x2(f64, f64); +-/// A SIMD boolean vector for length-2 vectors of 64-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64ix2(i64, i64); +-/// A SIMD boolean vector for length-2 vectors of 64-bit floats. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64fx2(i64, i64); +- +-simd! { +- bool64ix2: i64x2 = i64, u64x2 = u64, bool64ix2 = bool64i; +- bool64fx2: f64x2 = f64, bool64fx2 = bool64f; +-} +-basic_impls! { +- u64x2: u64, bool64ix2, simd_shuffle2, 2, x0 | x1; +- i64x2: i64, bool64ix2, simd_shuffle2, 2, x0 | x1; +- f64x2: f64, bool64fx2, simd_shuffle2, 2, x0 | x1; +-} +- +-mod common { +- use super::*; +- // naive for now +- #[inline] +- pub fn bool64ix2_all(x: bool64ix2) -> bool { +- x.0 != 0 && x.1 != 0 +- } +- #[inline] +- pub fn bool64ix2_any(x: bool64ix2) -> bool { +- x.0 != 0 || x.1 != 0 +- } +- #[inline] +- pub fn bool64fx2_all(x: bool64fx2) -> bool { +- x.0 != 0 && x.1 != 0 +- } +- #[inline] +- pub fn bool64fx2_any(x: bool64fx2) -> bool { +- x.0 != 0 || x.1 != 0 +- }} +-bool_impls! { +- bool64ix2: bool64i, i64x2, i64, 2, bool64ix2_all, bool64ix2_any, x0 | x1 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool64fx2]; +- +- bool64fx2: bool64f, i64x2, i64, 2, bool64fx2_all, bool64fx2_any, x0 | x1 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool64ix2]; +-} +- +-impl u64x2 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x2 { +- unsafe {simd_cast(self)} +- } +-} +-impl i64x2 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x2 { +- unsafe {simd_cast(self)} +- } +-} +-impl f64x2 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x2 { +- unsafe {simd_cast(self)} +- } +- +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe { +- let x: f32x2 = simd_cast(self); +- f32x4::new(x.0, x.1, 0.0, 0.0) +- } +- } +-} +- +-neg_impls!{ +- 0, +- i64x2, +-} +-neg_impls! { +- 0.0, +- f64x2, +-} +-macro_rules! not_impls { +- ($($ty: ident,)*) => { +- $(impl ops::Not for $ty { +- type Output = Self; +- fn not(self) -> Self { +- $ty::splat(!0) ^ self +- } +- })* +- } +-} +-not_impls! { +- i64x2, +- u64x2, +-} +- +-macro_rules! operators { +- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { +- $( +- $(impl ops::$trayt for $ty { +- type Output = Self; +- #[inline] +- fn $method(self, x: Self) -> Self { +- unsafe {$func(self, x)} +- } +- })* +- )* +- } +-} +-operators! { +- Add (simd_add, add): +- i64x2, u64x2, +- f64x2; +- Sub (simd_sub, sub): +- i64x2, u64x2, +- f64x2; +- Mul (simd_mul, mul): +- i64x2, u64x2, +- f64x2; +- Div (simd_div, div): f64x2; +- +- BitAnd (simd_and, bitand): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +- BitOr (simd_or, bitor): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +- BitXor (simd_xor, bitxor): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +-} +- +-macro_rules! shift_one { ($ty: ident, $($by: ident),*) => { +- $( +- impl ops::Shl<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shl(self, other: $by) -> Self { +- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } +- } +- } +- impl ops::Shr<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shr(self, other: $by) -> Self { +- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} +- } +- } +- )* +- } +-} +- +-macro_rules! shift { +- ($($ty: ident),*) => { +- $(shift_one! { +- $ty, +- u8, u16, u32, u64, usize, +- i8, i16, i32, i64, isize +- })* +- } +-} +-shift! { +- i64x2, u64x2 +-} +diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs +deleted file mode 100644 +index 519eb14e7259..000000000000 +--- a/third_party/rust/simd/src/v256.rs ++++ /dev/null +@@ -1,436 +0,0 @@ +-#![allow(dead_code)] +-use core::{mem,ops}; +-#[allow(unused_imports)] +-use super::{ +- Simd, +- u32x4, i32x4, u16x8, i16x8, u8x16, i8x16, f32x4, +- bool32ix4, bool16ix8, bool8ix16, bool32fx4, +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- bool8i, bool16i, bool32i, bool32f, +- Unalign, bitcast, +-}; +-use super::sixty_four::*; +-#[cfg(all(target_feature = "avx"))] +-use super::x86::avx::common; +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u64x4(u64, u64, u64, u64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i64x4(i64, i64, i64, i64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f64x4(f64, f64, f64, f64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64ix4(i64, i64, i64, i64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64fx4(i64, i64, i64, i64); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u32x8(u32, u32, u32, u32, +- u32, u32, u32, u32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i32x8(i32, i32, i32, i32, +- i32, i32, i32, i32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f32x8(f32, f32, f32, f32, +- f32, f32, f32, f32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix8(i32, i32, i32, i32, +- i32, i32, i32, i32);#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx8(i32, i32, i32, i32, +- i32, i32, i32, i32); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16, +- u16, u16, u16, u16, u16, u16, u16, u16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16, +- i16, i16, i16, i16, i16, i16, i16, i16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix16(i16, i16, i16, i16, i16, i16, i16, i16, +- i16, i16, i16, i16, i16, i16, i16, i16); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix32(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +- +-simd! { +- bool8ix32: i8x32 = i8, u8x32 = u8, bool8ix32 = bool8i; +- bool16ix16: i16x16 = i16, u16x16 = u16, bool16ix16 = bool16i; +- bool32ix8: i32x8 = i32, u32x8 = u32, bool32ix8 = bool32i; +- bool64ix4: i64x4 = i64, u64x4 = u64, bool64ix4 = bool64i; +- +- bool32fx8: f32x8 = f32, bool32fx8 = bool32f; +- bool64fx4: f64x4 = f64, bool64fx4 = bool64f; +-} +- +-basic_impls! { +- u64x4: u64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- i64x4: i64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- f64x4: f64, bool64fx4, simd_shuffle4, 4, x0, x1 | x2, x3; +- +- u32x8: u32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- i32x8: i32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- f32x8: f32, bool32fx8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- +- u16x16: u16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- i16x16: i16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- +- u8x32: u8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; +- i8x32: i8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; +-} +- +-#[cfg(all(not(target_feature = "avx")))] +-#[doc(hidden)] +-mod common { +- use super::*; +- // implementation via SSE vectors +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- x.low().all() && x.high().all() +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- x.low().any() || x.high().any() +- } +- )* +- } +- } +- +- bools! { +- bool64ix4, bool64ix4_all, bool64ix4_any; +- bool64fx4, bool64fx4_all, bool64fx4_any; +- bool32ix8, bool32ix8_all, bool32ix8_any; +- bool32fx8, bool32fx8_all, bool32fx8_any; +- bool16ix16, bool16ix16_all, bool16ix16_any; +- bool8ix32, bool8ix32_all, bool8ix32_any; +- } +- +-} +- +-bool_impls! { +- bool64ix4: bool64i, i64x4, i64, 4, bool64ix4_all, bool64ix4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool64fx4]; +- +- bool64fx4: bool64f, i64x4, i64, 4, bool64fx4_all, bool64fx4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool64ix4]; +- +- bool32ix8: bool32i, i32x8, i32, 8, bool32ix8_all, bool32ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool32fx8]; +- +- bool32fx8: bool32f, i32x8, i32, 8, bool32fx8_all, bool32fx8_any, x0, x1, x2, x3 | x4, x5, x6, x7 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool32ix8]; +- +- bool16ix16: bool16i, i16x16, i16, 16, bool16ix16_all, bool16ix16_any, +- x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; +- +- bool8ix32: bool8i, i8x32, i8, 32, bool8ix32_all, bool8ix32_any, +- x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 []; +-} +- +-pub trait LowHigh128 { +- type Half: Simd; +- /// Extract the low 128 bit part. +- fn low(self) -> Self::Half; +- /// Extract the high 128 bit part. +- fn high(self) -> Self::Half; +-} +- +-macro_rules! expr { ($x:expr) => ($x) } // HACK +-macro_rules! low_high_impls { +- ($( +- $name: ident, $half: ident, $($first: tt),+ ... $($last: tt),+; +- )*) => { +- $(impl LowHigh128 for $name { +- type Half = $half; +- #[inline] +- fn low(self) -> Self::Half { +- $half::new($( expr!(self.$first), )*) +- } +- +- #[inline] +- fn high(self) -> Self::Half { +- $half::new($( expr!(self.$last), )*) +- } +- })* +- } +-} +- +-low_high_impls! { +- u64x4, u64x2, 0, 1 ... 2, 3; +- i64x4, i64x2, 0, 1 ... 2, 3; +- f64x4, f64x2, 0, 1 ... 2, 3; +- +- u32x8, u32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- i32x8, i32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- f32x8, f32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- +- u16x16, u16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; +- i16x16, i16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; +- +- u8x32, u8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... +- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; +- i8x32, i8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... +- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; +- +-} +- +-macro_rules! bool_low_high_impls { +- ($( +- $name: ident: $half: ident; +- )*) => { +- $(impl LowHigh128 for $name { +- type Half = $half; +- /// Extract the low 128 bit part. +- #[inline] +- fn low(self) -> Self::Half { +- Self::Half::from_repr(self.to_repr().low()) +- } +- +- /// Extract the high 128 bit part. +- #[inline] +- fn high(self) -> Self::Half { +- Self::Half::from_repr(self.to_repr().high()) +- } +- })* +- } +-} +- +-bool_low_high_impls! { +- bool64fx4: bool64fx2; +- bool32fx8: bool32fx4; +- +- bool64ix4: bool64ix2; +- bool32ix8: bool32ix4; +- bool16ix16: bool16ix8; +- bool8ix32: bool8ix16; +-} +- +-impl u64x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i64x4 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl f64x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u32x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i32x8 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl f32x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i16x16 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u16(self) -> u16x16 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u16x16 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i16(self) -> i16x16 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i8x32 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u8(self) -> u8x32 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u8x32 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i8(self) -> i8x32 { +- unsafe {simd_cast(self)} +- } +-} +- +-operators! { +- Add (simd_add, add): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Sub (simd_sub, sub): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Mul (simd_mul, mul): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Div (simd_div, div): f64x4, f32x8; +- +- BitAnd (simd_and, bitand): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +- BitOr (simd_or, bitor): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +- BitXor (simd_xor, bitxor): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +-} +- +-neg_impls!{ +- 0, +- i64x4, +- i32x8, +- i16x16, +- i8x32, +-} +- +-neg_impls! { +- 0.0, +- f64x4, +- f32x8, +-} +- +-not_impls! { +- i64x4, +- u64x4, +- i32x8, +- u32x8, +- i16x16, +- u16x16, +- i8x32, +- u8x32, +-} +- +-shift! { +- i64x4, +- u64x4, +- i32x8, +- u32x8, +- i16x16, +- u16x16, +- i8x32, +- u8x32 +-} +diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs +deleted file mode 100644 +index 180247e36561..000000000000 +--- a/third_party/rust/simd/src/x86/avx.rs ++++ /dev/null +@@ -1,290 +0,0 @@ +-use super::super::*; +-use sixty_four::*; +- +-use super::super::bitcast; +- +-pub use v256::{ +- f64x4, bool64fx4, u64x4, i64x4, bool64ix4, +- f32x8, bool32fx8, u32x8, i32x8, bool32ix8, +- u16x16, i16x16, bool16ix16, +- u8x32, i8x32, bool8ix32, +- LowHigh128 +-}; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm256_addsub_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_addsub_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_dp_ps(x: f32x8, y: f32x8, z: i32) -> f32x8; +- fn x86_mm256_hadd_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_hadd_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_hsub_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_hsub_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_max_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_max_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_min_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_min_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_movemask_ps(x: f32x8) -> i32; +- fn x86_mm256_movemask_pd(x: f64x4) -> i32; +- fn x86_mm_permutevar_ps(x: f32x4, y: i32x4) -> f32x4; +- fn x86_mm_permutevar_pd(x: f64x2, y: i64x2) -> f64x2; +- fn x86_mm256_permutevar_ps(x: f32x8, y: i32x8) -> f32x8; +- fn x86_mm256_permutevar_pd(x: f64x4, y: i64x4) -> f64x4; +- fn x86_mm256_rcp_ps(x: f32x8) -> f32x8; +- fn x86_mm256_rsqrt_ps(x: f32x8) -> f32x8; +- fn x86_mm256_sqrt_ps(x: f32x8) -> f32x8; +- fn x86_mm256_sqrt_pd(x: f64x4) -> f64x4; +- fn x86_mm_testc_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testc_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testc_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testc_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testc_si256(x: u64x4, y: u64x4) -> i32; +- fn x86_mm_testnzc_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testnzc_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testnzc_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testnzc_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testnzc_si256(x: u64x4, y: u64x4) -> i32; +- fn x86_mm_testz_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testz_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testz_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testz_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testz_si256(x: u64x4, y: u64x4) -> i32; +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::*; +- use core::mem; +- +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$testc(mem::transmute(x), mem::transmute(<$ty>::splat(true))) != 0 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$testz(mem::transmute(x), mem::transmute(x)) == 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx8, bool32fx8_all, bool32fx8_any, x86_mm256_testc_ps, x86_mm256_testz_ps; +- bool64fx4, bool64fx4_all, bool64fx4_any, x86_mm256_testc_pd, x86_mm256_testz_pd; +- bool8ix32, bool8ix32_all, bool8ix32_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool16ix16, bool16ix16_all, bool16ix16_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool32ix8, bool32ix8_all, bool32ix8_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool64ix4, bool64ix4_all, bool64ix4_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- } +-} +- +-// 128-bit vectors: +- +-// 32 bit floats +- +-pub trait AvxF32x4 { +- fn permutevar(self, other: i32x4) -> f32x4; +-} +-impl AvxF32x4 for f32x4 { +- fn permutevar(self, other: i32x4) -> f32x4 { +- unsafe { x86_mm_permutevar_ps(self, other) } +- } +-} +- +-pub trait AvxF64x4 { +- fn sqrt(self) -> Self; +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn move_mask(self) -> u32; +-} +- +-impl AvxF64x4 for f64x4 { +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm256_sqrt_pd(self) } +- } +- +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm256_addsub_pd(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm256_hadd_pd(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm256_hsub_pd(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm256_max_pd(self, other) } +- } +- +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm256_min_pd(self, other) } +- } +- +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_pd(self) as u32 } +- } +-} +- +-pub trait AvxBool64fx4 { +- fn move_mask(self) -> u32; +-} +-impl AvxBool64fx4 for bool64fx4 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_pd(bitcast(self)) as u32 } +- } +-} +- +-pub trait AvxF32x8 { +- fn sqrt(self) -> Self; +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn move_mask(self) -> u32; +- /// Compute an approximation to the reciprocal of the square root +- /// of `self`, that is, `f32x8::splat(1.0) / self.sqrt()`. +- /// +- /// The accuracy of this approximation is platform dependent. +- fn approx_rsqrt(self) -> Self; +- /// Compute an approximation to the reciprocal of `self`, that is, +- /// `f32x8::splat(1.0) / self`. +- /// +- /// The accuracy of this approximation is platform dependent. +- fn approx_reciprocal(self) -> Self; +-} +- +-impl AvxF32x8 for f32x8 { +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm256_sqrt_ps(self) } +- } +- +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm256_addsub_ps(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm256_hadd_ps(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm256_hsub_ps(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm256_max_ps(self, other) } +- } +- +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm256_min_ps(self, other) } +- } +- +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_ps(self) as u32 } +- } +- +- #[inline] +- fn approx_reciprocal(self) -> Self { +- unsafe { x86_mm256_rcp_ps(self) } +- } +- +- #[inline] +- fn approx_rsqrt(self) -> Self { +- unsafe { x86_mm256_rsqrt_ps(self) } +- } +-} +- +-pub trait AvxBool32fx8 { +- fn move_mask(self) -> u32; +-} +-impl AvxBool32fx8 for bool32fx8 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_ps(bitcast(self)) as u32 } +- } +-} +- +-pub trait AvxBool32fx4 {} +-impl AvxBool32fx4 for bool32fx4 {} +- +-// 64 bit floats +- +-pub trait AvxF64x2 { +- fn permutevar(self, other: i64x2) -> f64x2; +-} +-impl AvxF64x2 for f64x2 { +- fn permutevar(self, other: i64x2) -> f64x2 { +- unsafe { x86_mm_permutevar_pd(self, other) } +- } +-} +- +-pub trait AvxBool64fx2 {} +-impl AvxBool64fx2 for bool64fx2 {} +- +-// 64 bit integers +- +-pub trait AvxU64x2 {} +-impl AvxU64x2 for u64x2 {} +-pub trait AvxI64x2 {} +-impl AvxI64x2 for i64x2 {} +- +-pub trait AvxBool64ix2 {} +-impl AvxBool64ix2 for bool64ix2 {} +- +-// 32 bit integers +- +-pub trait AvxU32x4 {} +-impl AvxU32x4 for u32x4 {} +-pub trait AvxI32x4 {} +-impl AvxI32x4 for i32x4 {} +- +-pub trait AvxBool32ix4 {} +-impl AvxBool32ix4 for bool32ix4 {} +- +-// 16 bit integers +- +-pub trait AvxU16x8 {} +-impl AvxU16x8 for u16x8 {} +-pub trait AvxI16x8 {} +-impl AvxI16x8 for i16x8 {} +- +-pub trait AvxBool16ix8 {} +-impl AvxBool16ix8 for bool16ix8 {} +- +-// 8 bit integers +- +-pub trait AvxU8x16 {} +-impl AvxU8x16 for u8x16 {} +-pub trait AvxI8x16 {} +-impl AvxI8x16 for i8x16 {} +- +-pub trait AvxBool8ix16 {} +-impl AvxBool8ix16 for bool8ix16 {} +diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs +deleted file mode 100644 +index e86a33d3b5bb..000000000000 +--- a/third_party/rust/simd/src/x86/avx2.rs ++++ /dev/null +@@ -1,65 +0,0 @@ +-use x86::avx::*; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm256_abs_epi8(x: i8x32) -> i8x32; +- fn x86_mm256_abs_epi16(x: i16x16) -> i16x16; +- fn x86_mm256_abs_epi32(x: i32x8) -> i32x8; +- fn x86_mm256_adds_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_adds_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_adds_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_adds_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_avg_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_avg_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_hadd_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hadd_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_hadds_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hsub_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hsub_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_hsubs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_madd_epi16(x: i16x16, y: i16x16) -> i32x8; +- fn x86_mm256_maddubs_epi16(x: i8x32, y: i8x32) -> i16x16; +- fn x86_mm256_max_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_max_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_max_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_max_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_max_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_max_epu32(x: u32x8, y: u32x8) -> u32x8; +- fn x86_mm256_min_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_min_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_min_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_min_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_min_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_min_epu32(x: u32x8, y: u32x8) -> u32x8; +- fn x86_mm256_mul_epi64(x: i32x8, y: i32x8) -> i64x4; +- fn x86_mm256_mul_epu64(x: u32x8, y: u32x8) -> u64x4; +- fn x86_mm256_mulhi_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_mulhi_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_mulhrs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_packs_epi16(x: i16x16, y: i16x16) -> i8x32; +- fn x86_mm256_packus_epi16(x: i16x16, y: i16x16) -> u8x32; +- fn x86_mm256_packs_epi32(x: i32x8, y: i32x8) -> i16x16; +- fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16; +- fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8; +- fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4; +- fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_sign_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_subs_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_subs_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_subs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_subs_epu16(x: u16x16, y: u16x16) -> u16x16; +-} +- +-// broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) +-// pub trait Avx2F32x8 { +-// fn permutevar(self, other: i32x8) -> f32x8; +-// } +-// +-// impl Avx2F32x8 for f32x8 { +-// fn permutevar(self, other: i32x8) -> f32x8 { +-// unsafe { x86_mm256_permutevar8x32_ps(self, other) } +-// } +-// } +diff --git a/third_party/rust/simd/src/x86/mod.rs b/third_party/rust/simd/src/x86/mod.rs +deleted file mode 100644 +index 8763fb16ccfd..000000000000 +--- a/third_party/rust/simd/src/x86/mod.rs ++++ /dev/null +@@ -1,16 +0,0 @@ +-//! Features specific to x86 and x86-64 CPUs. +- +-#[cfg(any(feature = "doc", target_feature = "sse2"))] +-pub mod sse2; +-#[cfg(any(feature = "doc", target_feature = "sse3"))] +-pub mod sse3; +-#[cfg(any(feature = "doc", target_feature = "ssse3"))] +-pub mod ssse3; +-#[cfg(any(feature = "doc", target_feature = "sse4.1"))] +-pub mod sse4_1; +-#[cfg(any(feature = "doc", target_feature = "sse4.2"))] +-pub mod sse4_2; +-#[cfg(any(feature = "doc", target_feature = "avx"))] +-pub mod avx; +-#[cfg(any(feature = "doc", target_feature = "avx2"))] +-pub mod avx2; +diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs +deleted file mode 100644 +index 5cbc853694d5..000000000000 +--- a/third_party/rust/simd/src/x86/sse2.rs ++++ /dev/null +@@ -1,359 +0,0 @@ +-use super::super::*; +-use {bitcast, simd_cast, f32x2}; +- +-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; +- +-//pub use super::{u64x2, i64x2, f64x2, bool64ix2, bool64fx2}; +- +-// strictly speaking, these are SSE instructions, not SSE2. +-extern "platform-intrinsic" { +- fn x86_mm_movemask_ps(x: f32x4) -> i32; +- fn x86_mm_max_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_min_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_rsqrt_ps(x: f32x4) -> f32x4; +- fn x86_mm_rcp_ps(x: f32x4) -> f32x4; +- fn x86_mm_sqrt_ps(x: f32x4) -> f32x4; +-} +- +-extern "platform-intrinsic" { +- fn x86_mm_adds_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_adds_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_adds_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_adds_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_avg_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_avg_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_madd_epi16(x: i16x8, y: i16x8) -> i32x4; +- fn x86_mm_max_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_max_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_max_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_min_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_min_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_min_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_movemask_pd(x: f64x2) -> i32; +- fn x86_mm_movemask_epi8(x: i8x16) -> i32; +- fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2; +- fn x86_mm_mulhi_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_mulhi_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_packs_epi16(x: i16x8, y: i16x8) -> i8x16; +- fn x86_mm_packs_epi32(x: i32x4, y: i32x4) -> i16x8; +- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; +- fn x86_mm_sad_epu8(x: u8x16, y: u8x16) -> u64x2; +- fn x86_mm_sqrt_pd(x: f64x2) -> f64x2; +- fn x86_mm_subs_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_subs_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_subs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_subs_epu16(x: u16x8, y: u16x8) -> u16x8; +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_sqrt_ps(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_rsqrt_ps(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_rcp_ps(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::x86_mm_max_ps(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::x86_mm_min_ps(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident, $movemask: ident, $width: expr;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$movemask(mem::transmute(x)) == (1 << $width) - 1 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$movemask(mem::transmute(x)) != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, bool32fx4_all, bool32fx4_any, x86_mm_movemask_ps, 4; +- bool8ix16, bool8ix16_all, bool8ix16_any, x86_mm_movemask_epi8, 16; +- bool16ix8, bool16ix8_all, bool16ix8_any, x86_mm_movemask_epi8, 16; +- bool32ix4, bool32ix4_all, bool32ix4_any, x86_mm_movemask_epi8, 16; +- } +-} +- +-// 32 bit floats +- +-pub trait Sse2F32x4 { +- fn to_f64(self) -> f64x2; +- fn move_mask(self) -> u32; +-} +-impl Sse2F32x4 for f32x4 { +- #[inline] +- fn to_f64(self) -> f64x2 { +- unsafe { +- simd_cast(f32x2(self.0, self.1)) +- } +- } +- fn move_mask(self) -> u32 { +- unsafe {x86_mm_movemask_ps(self) as u32} +- } +-} +-pub trait Sse2Bool32fx4 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool32fx4 for bool32fx4 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_ps(bitcast(self)) as u32} +- } +-} +- +-// 64 bit floats +- +-pub trait Sse2F64x2 { +- fn move_mask(self) -> u32; +- fn sqrt(self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse2F64x2 for f64x2 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} +- } +- +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm_sqrt_pd(self) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_pd(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_pd(self, other) } +- } +-} +- +-pub trait Sse2Bool64fx2 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool64fx2 for bool64fx2 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} +- } +-} +- +-// 64 bit ints +- +-pub trait Sse2U64x2 {} +-impl Sse2U64x2 for u64x2 {} +- +-pub trait Sse2I64x2 {} +-impl Sse2I64x2 for i64x2 {} +- +-pub trait Sse2Bool64ix2 {} +-impl Sse2Bool64ix2 for bool64ix2 {} +- +-// 32 bit ints +- +-pub trait Sse2U32x4 { +- fn low_mul(self, other: Self) -> u64x2; +-} +-impl Sse2U32x4 for u32x4 { +- #[inline] +- fn low_mul(self, other: Self) -> u64x2 { +- unsafe { x86_mm_mul_epu32(self, other) } +- } +-} +- +-pub trait Sse2I32x4 { +- fn packs(self, other: Self) -> i16x8; +-} +-impl Sse2I32x4 for i32x4 { +- #[inline] +- fn packs(self, other: Self) -> i16x8 { +- unsafe { x86_mm_packs_epi32(self, other) } +- } +-} +- +-pub trait Sse2Bool32ix4 {} +-impl Sse2Bool32ix4 for bool32ix4 {} +- +-// 16 bit ints +- +-pub trait Sse2U16x8 { +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn avg(self, other: Self) -> Self; +- fn mulhi(self, other: Self) -> Self; +-} +-impl Sse2U16x8 for u16x8 { +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epu16(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epu16(self, other) } +- } +- +- #[inline] +- fn avg(self, other: Self) -> Self { +- unsafe { x86_mm_avg_epu16(self, other) } +- } +- +- #[inline] +- fn mulhi(self, other: Self) -> Self { +- unsafe { x86_mm_mulhi_epu16(self, other) } +- } +-} +- +-pub trait Sse2I16x8 { +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn madd(self, other: Self) -> i32x4; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn mulhi(self, other: Self) -> Self; +- fn packs(self, other: Self) -> i8x16; +- fn packus(self, other: Self) -> u8x16; +-} +-impl Sse2I16x8 for i16x8 { +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epi16(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epi16(self, other) } +- } +- +- #[inline] +- fn madd(self, other: Self) -> i32x4 { +- unsafe { x86_mm_madd_epi16(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi16(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi16(self, other) } +- } +- +- #[inline] +- fn mulhi(self, other: Self) -> Self { +- unsafe { x86_mm_mulhi_epi16(self, other) } +- } +- +- #[inline] +- fn packs(self, other: Self) -> i8x16 { +- unsafe { x86_mm_packs_epi16(self, other) } +- } +- #[inline] +- fn packus(self, other: Self) -> u8x16 { +- unsafe { x86_mm_packus_epi16(self, other) } +- } +-} +- +-pub trait Sse2Bool16ix8 {} +-impl Sse2Bool16ix8 for bool16ix8 {} +- +-// 8 bit ints +- +-pub trait Sse2U8x16 { +- fn move_mask(self) -> u32; +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn avg(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn sad(self, other: Self) -> u64x2; +-} +-impl Sse2U8x16 for u8x16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +- +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epu8(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epu8(self, other) } +- } +- +- #[inline] +- fn avg(self, other: Self) -> Self { +- unsafe { x86_mm_avg_epu8(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu8(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu8(self, other) } +- } +- +- #[inline] +- fn sad(self, other: Self) -> u64x2 { +- unsafe { x86_mm_sad_epu8(self, other) } +- } +-} +- +-pub trait Sse2I8x16 { +- fn move_mask(self) -> u32; +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +-} +-impl Sse2I8x16 for i8x16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +- +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epi8(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epi8(self, other) } +- } +-} +- +-pub trait Sse2Bool8ix16 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool8ix16 for bool8ix16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +-} +diff --git a/third_party/rust/simd/src/x86/sse3.rs b/third_party/rust/simd/src/x86/sse3.rs +deleted file mode 100644 +index bd70b569f9c0..000000000000 +--- a/third_party/rust/simd/src/x86/sse3.rs ++++ /dev/null +@@ -1,57 +0,0 @@ +-use sixty_four::*; +-use super::super::*; +- +-extern "platform-intrinsic" { +- fn x86_mm_addsub_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_addsub_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_hadd_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_hadd_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_hsub_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_hsub_pd(x: f64x2, y: f64x2) -> f64x2; +-} +- +-pub trait Sse3F32x4 { +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +- +-impl Sse3F32x4 for f32x4 { +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm_addsub_ps(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_ps(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_ps(self, other) } +- } +-} +- +-pub trait Sse3F64x2 { +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +- +-impl Sse3F64x2 for f64x2 { +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm_addsub_pd(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_pd(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_pd(self, other) } +- } +-} +diff --git a/third_party/rust/simd/src/x86/sse4_1.rs b/third_party/rust/simd/src/x86/sse4_1.rs +deleted file mode 100644 +index fa44678a0584..000000000000 +--- a/third_party/rust/simd/src/x86/sse4_1.rs ++++ /dev/null +@@ -1,155 +0,0 @@ +-use super::super::*; +-use x86::sse2::*; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4; +- fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2; +- fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4; +- fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4; +- fn x86_mm_minpos_epu16(x: u16x8) -> u16x8; +- fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8; +- fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2; +- fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8; +- fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32; +- fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32; +- fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32; +-} +- +-// 32 bit floats +- +-pub trait Sse41F32x4 {} +-impl Sse41F32x4 for f32x4 {} +- +-// 64 bit floats +- +-pub trait Sse41F64x2 {} +-impl Sse41F64x2 for f64x2 {} +- +-// 64 bit integers +- +-pub trait Sse41U64x2 { +- fn testc(self, other: Self) -> i32; +- fn testnzc(self, other: Self) -> i32; +- fn testz(self, other: Self) -> i32; +-} +-impl Sse41U64x2 for u64x2 { +- #[inline] +- fn testc(self, other: Self) -> i32 { +- unsafe { x86_mm_testc_si128(self, other) } +- } +- #[inline] +- fn testnzc(self, other: Self) -> i32 { +- unsafe { x86_mm_testnzc_si128(self, other) } +- } +- #[inline] +- fn testz(self, other: Self) -> i32 { +- unsafe { x86_mm_testz_si128(self, other) } +- } +-} +-pub trait Sse41I64x2 {} +-impl Sse41I64x2 for i64x2 {} +- +-pub trait Sse41Bool64ix2 {} +-impl Sse41Bool64ix2 for bool64ix2 {} +- +-// 32 bit integers +- +-pub trait Sse41U32x4 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse41U32x4 for u32x4 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu32(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu32(self, other) } +- } +-} +-pub trait Sse41I32x4 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn low_mul(self, other: Self) -> i64x2; +- fn packus(self, other: Self) -> u16x8; +-} +-impl Sse41I32x4 for i32x4 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi32(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi32(self, other) } +- } +- +- #[inline] +- fn low_mul(self, other: Self) -> i64x2 { +- unsafe { x86_mm_mul_epi32(self, other) } +- } +- #[inline] +- fn packus(self, other: Self) -> u16x8 { +- unsafe { x86_mm_packus_epi32(self, other) } +- } +-} +- +-pub trait Sse41Bool32ix4 {} +-impl Sse41Bool32ix4 for bool32ix4 {} +- +-// 16 bit integers +- +-pub trait Sse41U16x8 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn minpos(self) -> Self; +-} +-impl Sse41U16x8 for u16x8 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu16(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu16(self, other) } +- } +- +- #[inline] +- fn minpos(self) -> Self { +- unsafe { x86_mm_minpos_epu16(self) } +- } +-} +-pub trait Sse41I16x8 {} +-impl Sse41I16x8 for i16x8 {} +- +-pub trait Sse41Bool16ix8 {} +-impl Sse41Bool16ix8 for bool16ix8 {} +- +-// 8 bit integers +- +-pub trait Sse41U8x16 {} +-impl Sse41U8x16 for u8x16 {} +-pub trait Sse41I8x16 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse41I8x16 for i8x16 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi8(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi8(self, other) } +- } +-} +- +-pub trait Sse41Bool8ix16 {} +-impl Sse41Bool8ix16 for bool8ix16 {} +diff --git a/third_party/rust/simd/src/x86/sse4_2.rs b/third_party/rust/simd/src/x86/sse4_2.rs +deleted file mode 100644 +index 5afe4583cf71..000000000000 +--- a/third_party/rust/simd/src/x86/sse4_2.rs ++++ /dev/null +@@ -1,19 +0,0 @@ +-use i8x16; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm_cmpestra(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrc(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestri(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrm(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i8x16; +- fn x86_mm_cmpestro(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrs(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrz(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpistra(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrc(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistri(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrm(x: i8x16, y: i8x16, z: i32) -> i8x16; +- fn x86_mm_cmpistro(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrs(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrz(x: i8x16, y: i8x16, z: i32) -> i32; +-} +diff --git a/third_party/rust/simd/src/x86/ssse3.rs b/third_party/rust/simd/src/x86/ssse3.rs +deleted file mode 100644 +index aa22a08a68a4..000000000000 +--- a/third_party/rust/simd/src/x86/ssse3.rs ++++ /dev/null +@@ -1,172 +0,0 @@ +-use super::super::*; +-use bitcast; +- +-macro_rules! bitcast { +- ($func: ident($a: ident, $b: ident)) => { +- bitcast($func(bitcast($a), bitcast($b))) +- } +-} +- +-extern "platform-intrinsic" { +- fn x86_mm_abs_epi8(x: i8x16) -> i8x16; +- fn x86_mm_abs_epi16(x: i16x8) -> i16x8; +- fn x86_mm_abs_epi32(x: i32x4) -> i32x4; +- fn x86_mm_hadd_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hadd_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_hadds_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hsub_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hsub_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_hsubs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_maddubs_epi16(x: u8x16, y: i8x16) -> i16x8; +- fn x86_mm_mulhrs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_shuffle_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_sign_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_sign_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_sign_epi32(x: i32x4, y: i32x4) -> i32x4; +-} +- +-// 32 bit integers +- +-pub trait Ssse3I32x4 { +- fn abs(self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn sign(self, other: Self) -> Self; +-} +-impl Ssse3I32x4 for i32x4 { +- #[inline] +- fn abs(self) -> Self { +- unsafe { x86_mm_abs_epi32(self) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_epi32(self, other) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_epi32(self, other) } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi32(self, other) } +- } +-} +- +-pub trait Ssse3U32x4 { +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +-impl Ssse3U32x4 for u32x4 { +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hadd_epi32(self, other)) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hsub_epi32(self, other)) } +- } +-} +- +-// 16 bit integers +- +-pub trait Ssse3I16x8 { +- fn abs(self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hadds(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn hsubs(self, other: Self) -> Self; +- fn sign(self, other: Self) -> Self; +- fn mulhrs(self, other: Self) -> Self; +-} +-impl Ssse3I16x8 for i16x8 { +- #[inline] +- fn abs(self) -> Self { +- unsafe { x86_mm_abs_epi16(self) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_epi16(self, other) } +- } +- #[inline] +- fn hadds(self, other: Self) -> Self { +- unsafe { x86_mm_hadds_epi16(self, other) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_epi16(self, other) } +- } +- #[inline] +- fn hsubs(self, other: Self) -> Self { +- unsafe { x86_mm_hsubs_epi16(self, other) } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi16(self, other) } +- } +- +- #[inline] +- fn mulhrs(self, other: Self) -> Self { +- unsafe { x86_mm_mulhrs_epi16(self, other) } +- } +-} +- +-pub trait Ssse3U16x8 { +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +-impl Ssse3U16x8 for u16x8 { +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hadd_epi16(self, other)) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hsub_epi16(self, other)) } +- } +-} +- +- +-// 8 bit integers +- +-pub trait Ssse3U8x16 { +- fn shuffle_bytes(self, indices: Self) -> Self; +- fn maddubs(self, other: i8x16) -> i16x8; +-} +- +-impl Ssse3U8x16 for u8x16 { +- #[inline] +- fn shuffle_bytes(self, indices: Self) -> Self { +- unsafe {bitcast!(x86_mm_shuffle_epi8(self, indices))} +- } +- +- fn maddubs(self, other: i8x16) -> i16x8 { +- unsafe { x86_mm_maddubs_epi16(self, other) } +- } +-} +- +-pub trait Ssse3I8x16 { +- fn abs(self) -> Self; +- fn shuffle_bytes(self, indices: Self) -> Self; +- fn sign(self, other: Self) -> Self; +-} +-impl Ssse3I8x16 for i8x16 { +- #[inline] +- fn abs(self) -> Self { +- unsafe {x86_mm_abs_epi8(self)} +- } +- #[inline] +- fn shuffle_bytes(self, indices: Self) -> Self { +- unsafe { +- x86_mm_shuffle_epi8(self, indices) +- } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi8(self, other) } +- } +-} +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index 70416df2f6df..c1bf3721e423 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -716,14 +716,11 @@ set_config('MOZ_ENABLE_WEBRENDER', webrender.enable) + option('--enable-rust-simd', env='MOZ_RUST_SIMD', + help='Enable explicit SIMD in Rust code.') + +-@depends('--enable-rust-simd', target, rustc_info) +-def rust_simd(value, target, rustc_info): +- # As of 2018-06-05, the simd crate only works on aarch64, +- # armv7, x86 and x86_64. ++@depends('--enable-rust-simd', target) ++def rust_simd(value, target): ++ # As of 2019-03-04, the simd-accel feature of encoding_rs has not ++ # been properly set up outside aarch64, armv7, x86 and x86_64. + if target.cpu in ('aarch64', 'arm', 'x86', 'x86_64') and value: +- if rustc_info and rustc_info.version >= Version('1.33.0'): +- die('--enable-rust-simd does not work with Rust 1.33 or later. ' +- 'See https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 .') + return True + + set_config('MOZ_RUST_SIMD', rust_simd) +-- +2.21.0 + diff --git a/libre/iceweasel/PKGBUILD b/libre/iceweasel/PKGBUILD index 520b6aa71..7b9bc0873 100644 --- a/libre/iceweasel/PKGBUILD +++ b/libre/iceweasel/PKGBUILD @@ -39,8 +39,8 @@ pkgname=iceweasel replaces=('firefox') epoch=1 -pkgver=66.0.3 -pkgrel=2 +pkgver=67.0 +pkgrel=1 pkgrel+=.parabola1 _brandingver=66.0 _brandingrel=1 @@ -50,7 +50,8 @@ arch+=(i686 armv7h) license=(MPL GPL LGPL) url="https://wiki.parabola.nu/$pkgname" depends=(gtk3 mozilla-common libxt startup-notification mime-types dbus-glib - ffmpeg nss ttf-font libpulse sqlite icu) + ffmpeg nss ttf-font libpulse) +depends+=(sqlite icu) makedepends=(unzip zip diffutils python2-setuptools yasm mesa imake inetutils xorg-server-xvfb autoconf2.13 rust clang llvm jack gtk2 python nodejs python2-psutil cbindgen nasm) @@ -61,17 +62,17 @@ optdepends=('networkmanager: Location detection via available WiFi networks' 'speech-dispatcher: Text-to-Speech' 'hunspell-en_US: Spell checking, American English') options=(!emptydirs !makeflags !debug) -source=("https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox-$pkgver.source.tar.xz"{,.asc} - 0001-bz-1468911.patch +source=(https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox-$pkgver.source.tar.xz{,.asc} + 0001-bz-1521249.patch $pkgname.desktop https://repo.parabola.nu/other/iceweasel/${pkgname}_${_brandingver}-${_brandingrel}.branding.tar.xz{,.sig} libre-searchengines.patch libre.patch arm.patch build-arm-libopus.patch) -sha256sums=('1fe4cfe4f4afce8d5532da8f36a48c333750c90a6ee67167309357edf514c22a' +sha256sums=('2cb937db00e35162393aa5b65d1f2280be1e62d5ee33d3e9997cccba542d791b' 'SKIP' - '821f858bac2e13ce02b8c20d5387d4ecc8ab2d0e4ebe0a517cbf935da6aeb31b' + 'd0673786a6a1f1b9f6f66a3a1356afa33f1f18f59dabd92bd193c88c52a1d04c' 'e439117380218898cd5c60a8da8947847efbd0fe64cc06b129d6ca20a392cb3d' 'ffac6b832ba2f80ac0682aa5ee88006b7033543b8be89fc17deec30b1b2c4d12' 'SKIP' @@ -87,7 +88,7 @@ prepare() { cd firefox-$pkgver # https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 - patch -Np1 -i ../0001-bz-1468911.patch + patch -Np1 -i ../0001-bz-1521249.patch cat >.mozconfig <> .mozconfig <