-rw-r--r-- 11542 libmceliece-20240726/crypto_kem/460896/avx/vec_reduce_asm.S raw
// 20240504 djb: add note.GNU-stack
// 20221231 djb: port hidden to macos; tnx thom wiggers
// 20221230 djb: add linker line
// linker define vec_reduce_asm
#include "crypto_asm_hidden.h"
#define vec_reduce_asm CRYPTO_SHARED_NAMESPACE(vec_reduce_asm)
#define _vec_reduce_asm _CRYPTO_SHARED_NAMESPACE(vec_reduce_asm)
# qhasm: int64 input_0
# qhasm: int64 input_1
# qhasm: int64 input_2
# qhasm: int64 input_3
# qhasm: int64 input_4
# qhasm: int64 input_5
# qhasm: stack64 input_6
# qhasm: stack64 input_7
# qhasm: int64 caller_r11
# qhasm: int64 caller_r12
# qhasm: int64 caller_r13
# qhasm: int64 caller_r14
# qhasm: int64 caller_r15
# qhasm: int64 caller_rbx
# qhasm: int64 caller_rbp
# qhasm: int64 t0
# qhasm: int64 t1
# qhasm: int64 c
# qhasm: int64 r
# qhasm: enter vec_reduce_asm
.p2align 5
ASM_HIDDEN _vec_reduce_asm
ASM_HIDDEN vec_reduce_asm
.global _vec_reduce_asm
.global vec_reduce_asm
_vec_reduce_asm:
vec_reduce_asm:
mov %rsp,%r11
and $31,%r11
add $0,%r11
sub %r11,%rsp
# qhasm: r = 0
# asm 1: mov  $0,>r=int64#7
# asm 2: mov  $0,>r=%rax
mov  $0,%rax
# qhasm: t0 = mem64[ input_0 + 192 ]
# asm 1: movq   192(<input_0=int64#1),>t0=int64#2
# asm 2: movq   192(<input_0=%rdi),>t0=%rsi
movq   192(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 200 ]
# asm 1: movq   200(<input_0=int64#1),>t1=int64#3
# asm 2: movq   200(<input_0=%rdi),>t1=%rdx
movq   200(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 176 ]
# asm 1: movq   176(<input_0=int64#1),>t0=int64#2
# asm 2: movq   176(<input_0=%rdi),>t0=%rsi
movq   176(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 184 ]
# asm 1: movq   184(<input_0=int64#1),>t1=int64#3
# asm 2: movq   184(<input_0=%rdi),>t1=%rdx
movq   184(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 160 ]
# asm 1: movq   160(<input_0=int64#1),>t0=int64#2
# asm 2: movq   160(<input_0=%rdi),>t0=%rsi
movq   160(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 168 ]
# asm 1: movq   168(<input_0=int64#1),>t1=int64#3
# asm 2: movq   168(<input_0=%rdi),>t1=%rdx
movq   168(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 144 ]
# asm 1: movq   144(<input_0=int64#1),>t0=int64#2
# asm 2: movq   144(<input_0=%rdi),>t0=%rsi
movq   144(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 152 ]
# asm 1: movq   152(<input_0=int64#1),>t1=int64#3
# asm 2: movq   152(<input_0=%rdi),>t1=%rdx
movq   152(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 128 ]
# asm 1: movq   128(<input_0=int64#1),>t0=int64#2
# asm 2: movq   128(<input_0=%rdi),>t0=%rsi
movq   128(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 136 ]
# asm 1: movq   136(<input_0=int64#1),>t1=int64#3
# asm 2: movq   136(<input_0=%rdi),>t1=%rdx
movq   136(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 112 ]
# asm 1: movq   112(<input_0=int64#1),>t0=int64#2
# asm 2: movq   112(<input_0=%rdi),>t0=%rsi
movq   112(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 120 ]
# asm 1: movq   120(<input_0=int64#1),>t1=int64#3
# asm 2: movq   120(<input_0=%rdi),>t1=%rdx
movq   120(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 96 ]
# asm 1: movq   96(<input_0=int64#1),>t0=int64#2
# asm 2: movq   96(<input_0=%rdi),>t0=%rsi
movq   96(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 104 ]
# asm 1: movq   104(<input_0=int64#1),>t1=int64#3
# asm 2: movq   104(<input_0=%rdi),>t1=%rdx
movq   104(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 80 ]
# asm 1: movq   80(<input_0=int64#1),>t0=int64#2
# asm 2: movq   80(<input_0=%rdi),>t0=%rsi
movq   80(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 88 ]
# asm 1: movq   88(<input_0=int64#1),>t1=int64#3
# asm 2: movq   88(<input_0=%rdi),>t1=%rdx
movq   88(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 64 ]
# asm 1: movq   64(<input_0=int64#1),>t0=int64#2
# asm 2: movq   64(<input_0=%rdi),>t0=%rsi
movq   64(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 72 ]
# asm 1: movq   72(<input_0=int64#1),>t1=int64#3
# asm 2: movq   72(<input_0=%rdi),>t1=%rdx
movq   72(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 48 ]
# asm 1: movq   48(<input_0=int64#1),>t0=int64#2
# asm 2: movq   48(<input_0=%rdi),>t0=%rsi
movq   48(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 56 ]
# asm 1: movq   56(<input_0=int64#1),>t1=int64#3
# asm 2: movq   56(<input_0=%rdi),>t1=%rdx
movq   56(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 32 ]
# asm 1: movq   32(<input_0=int64#1),>t0=int64#2
# asm 2: movq   32(<input_0=%rdi),>t0=%rsi
movq   32(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 40 ]
# asm 1: movq   40(<input_0=int64#1),>t1=int64#3
# asm 2: movq   40(<input_0=%rdi),>t1=%rdx
movq   40(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 16 ]
# asm 1: movq   16(<input_0=int64#1),>t0=int64#2
# asm 2: movq   16(<input_0=%rdi),>t0=%rsi
movq   16(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 24 ]
# asm 1: movq   24(<input_0=int64#1),>t1=int64#3
# asm 2: movq   24(<input_0=%rdi),>t1=%rdx
movq   24(%rdi),%rdx
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#3,<t0=int64#2
# asm 2: xor  <t1=%rdx,<t0=%rsi
xor  %rdx,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#2
# asm 2: popcnt <t0=%rsi, >c=%rsi
popcnt %rsi, %rsi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#2d
# asm 2: and  $1,<c=%esi
and  $1,%esi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#2,<r=int64#7
# asm 2: or   <c=%rsi,<r=%rax
or   %rsi,%rax
# qhasm: t0 = mem64[ input_0 + 0 ]
# asm 1: movq   0(<input_0=int64#1),>t0=int64#2
# asm 2: movq   0(<input_0=%rdi),>t0=%rsi
movq   0(%rdi),%rsi
# qhasm: t1 = mem64[ input_0 + 8 ]
# asm 1: movq   8(<input_0=int64#1),>t1=int64#1
# asm 2: movq   8(<input_0=%rdi),>t1=%rdi
movq   8(%rdi),%rdi
# qhasm: t0 ^= t1
# asm 1: xor  <t1=int64#1,<t0=int64#2
# asm 2: xor  <t1=%rdi,<t0=%rsi
xor  %rdi,%rsi
# qhasm: c = count(t0)
# asm 1: popcnt <t0=int64#2, >c=int64#1
# asm 2: popcnt <t0=%rsi, >c=%rdi
popcnt %rsi, %rdi
# qhasm: (uint32) c &= 1
# asm 1: and  $1,<c=int64#1d
# asm 2: and  $1,<c=%edi
and  $1,%edi
# qhasm: r <<= 1
# asm 1: shl  $1,<r=int64#7
# asm 2: shl  $1,<r=%rax
shl  $1,%rax
# qhasm: r |= c
# asm 1: or   <c=int64#1,<r=int64#7
# asm 2: or   <c=%rdi,<r=%rax
or   %rdi,%rax
# qhasm: return r
add %r11,%rsp
ret
.section	.note.GNU-stack,"",@progbits