Browse Source

Update go mod deps

Russ Magee 2 months ago
parent
commit
653e732445
100 changed files with 3478 additions and 2265 deletions
  1. 1 1
      Makefile
  2. 8 7
      go.mod
  3. 16 18
      go.sum
  4. 27 7
      vendor/blitter.com/go/hopscotch/hopscotch.go
  5. 0 67
      vendor/github.com/klauspost/cpuid/v2/.travis.yml
  6. 123 2
      vendor/github.com/klauspost/cpuid/v2/README.md
  7. 314 46
      vendor/github.com/klauspost/cpuid/v2/cpuid.go
  8. 2 1
      vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
  9. 2 1
      vendor/github.com/klauspost/cpuid/v2/detect_ref.go
  10. 4 3
      vendor/github.com/klauspost/cpuid/v2/detect_x86.go
  11. 153 91
      vendor/github.com/klauspost/cpuid/v2/featureid_string.go
  12. 107 5
      vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
  13. 2 3
      vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
  14. 2 1
      vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
  15. 2 1
      vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
  16. 0 65
      vendor/github.com/klauspost/reedsolomon/.travis.yml
  17. 114 45
      vendor/github.com/klauspost/reedsolomon/README.md
  18. 0 20
      vendor/github.com/klauspost/reedsolomon/appveyor.yml
  19. 23 0
      vendor/github.com/klauspost/reedsolomon/galois.go
  20. 3 3
      vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
  21. 206 0
      vendor/github.com/klauspost/reedsolomon/galois_amd64.go
  22. 30 0
      vendor/github.com/klauspost/reedsolomon/galois_arm64.go
  23. 376 1
      vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
  24. 141 233
      vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
  25. 30 19
      vendor/github.com/klauspost/reedsolomon/galois_noasm.go
  26. 28 0
      vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
  27. 53 5
      vendor/github.com/klauspost/reedsolomon/options.go
  28. 263 75
      vendor/github.com/klauspost/reedsolomon/reedsolomon.go
  29. 17 13
      vendor/github.com/klauspost/reedsolomon/streaming.go
  30. 0 3
      vendor/golang.org/x/crypto/AUTHORS
  31. 0 3
      vendor/golang.org/x/crypto/CONTRIBUTORS
  32. 4 6
      vendor/golang.org/x/crypto/argon2/argon2.go
  33. 0 33
      vendor/golang.org/x/crypto/internal/subtle/aliasing.go
  34. 0 36
      vendor/golang.org/x/crypto/internal/subtle/aliasing_purego.go
  35. 1 1
      vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
  36. 2 2
      vendor/golang.org/x/crypto/salsa20/salsa20.go
  37. 1 1
      vendor/golang.org/x/crypto/scrypt/scrypt.go
  38. 4 8
      vendor/golang.org/x/crypto/sha3/doc.go
  39. 2 0
      vendor/golang.org/x/crypto/sha3/sha3_s390x.go
  40. 0 3
      vendor/golang.org/x/sys/AUTHORS
  41. 0 3
      vendor/golang.org/x/sys/CONTRIBUTORS
  42. 1 0
      vendor/golang.org/x/sys/cpu/byteorder.go
  43. 2 2
      vendor/golang.org/x/sys/cpu/cpu.go
  44. 6 6
      vendor/golang.org/x/sys/cpu/cpu_arm64.go
  45. 12 17
      vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
  46. 2 2
      vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
  47. 2 2
      vendor/golang.org/x/sys/unix/endian_little.go
  48. 0 233
      vendor/golang.org/x/sys/unix/errors_freebsd_386.go
  49. 0 233
      vendor/golang.org/x/sys/unix/errors_freebsd_amd64.go
  50. 0 226
      vendor/golang.org/x/sys/unix/errors_freebsd_arm.go
  51. 0 17
      vendor/golang.org/x/sys/unix/errors_freebsd_arm64.go
  52. 1 8
      vendor/golang.org/x/sys/unix/ifreq_linux.go
  53. 17 3
      vendor/golang.org/x/sys/unix/ioctl_linux.go
  54. 20 15
      vendor/golang.org/x/sys/unix/mkall.sh
  55. 8 2
      vendor/golang.org/x/sys/unix/mkerrors.sh
  56. 0 27
      vendor/golang.org/x/sys/unix/str.go
  57. 1 8
      vendor/golang.org/x/sys/unix/syscall.go
  58. 61 6
      vendor/golang.org/x/sys/unix/syscall_aix.go
  59. 23 23
      vendor/golang.org/x/sys/unix/syscall_bsd.go
  60. 2 10
      vendor/golang.org/x/sys/unix/syscall_darwin.1_13.go
  61. 8 1
      vendor/golang.org/x/sys/unix/syscall_darwin.go
  62. 2 0
      vendor/golang.org/x/sys/unix/syscall_dragonfly.go
  63. 36 291
      vendor/golang.org/x/sys/unix/syscall_freebsd.go
  64. 3 3
      vendor/golang.org/x/sys/unix/syscall_freebsd_386.go
  65. 3 3
      vendor/golang.org/x/sys/unix/syscall_freebsd_amd64.go
  66. 2 2
      vendor/golang.org/x/sys/unix/syscall_freebsd_arm.go
  67. 2 2
      vendor/golang.org/x/sys/unix/syscall_freebsd_arm64.go
  68. 2 3
      vendor/golang.org/x/sys/unix/syscall_illumos.go
  69. 103 89
      vendor/golang.org/x/sys/unix/syscall_linux.go
  70. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_386.go
  71. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_amd64.go
  72. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_arm.go
  73. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_arm64.go
  74. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_mips64x.go
  75. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_mipsx.go
  76. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_ppc.go
  77. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_ppc64x.go
  78. 1 4
      vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go
  79. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_s390x.go
  80. 0 4
      vendor/golang.org/x/sys/unix/syscall_linux_sparc64.go
  81. 2 0
      vendor/golang.org/x/sys/unix/syscall_openbsd.go
  82. 4 0
      vendor/golang.org/x/sys/unix/syscall_openbsd_mips64.go
  83. 94 70
      vendor/golang.org/x/sys/unix/syscall_solaris.go
  84. 73 9
      vendor/golang.org/x/sys/unix/syscall_unix.go
  85. 2 11
      vendor/golang.org/x/sys/unix/sysvshm_unix.go
  86. 102 7
      vendor/golang.org/x/sys/unix/zerrors_freebsd_386.go
  87. 100 7
      vendor/golang.org/x/sys/unix/zerrors_freebsd_amd64.go
  88. 204 16
      vendor/golang.org/x/sys/unix/zerrors_freebsd_arm.go
  89. 93 7
      vendor/golang.org/x/sys/unix/zerrors_freebsd_arm64.go
  90. 382 8
      vendor/golang.org/x/sys/unix/zerrors_linux.go
  91. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_386.go
  92. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
  93. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
  94. 5 2
      vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
  95. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
  96. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
  97. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
  98. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
  99. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
  100. 4 2
      vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go

+ 1 - 1
Makefile

@@ -1,4 +1,4 @@
-VERSION := 0.9.5.1
+VERSION := 0.9.5.2
 .PHONY: lint vis clean common client server passwd subpkgs install uninstall reinstall
 
 ## Tag version of binaries with build info wrt.

+ 8 - 7
go.mod

@@ -1,30 +1,31 @@
 module blitter.com/go/xs
 
-go 1.17
+go 1.18
 
 require (
 	blitter.com/go/cryptmt v1.0.2
 	blitter.com/go/goutmp v1.0.6
 	blitter.com/go/herradurakex v1.0.0
-	blitter.com/go/hopscotch v0.0.0-20211113042251-b8a306eea4dc
+	blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a
 	blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9
 	blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae
 	github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da
 	github.com/creack/pty v1.1.18
 	github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f
 	github.com/kuking/go-frodokem v1.0.2
-	github.com/mattn/go-isatty v0.0.14
+	github.com/mattn/go-isatty v0.0.16
 	github.com/xtaci/kcp-go v5.4.20+incompatible
-	golang.org/x/crypto v0.0.0-20220408190544-5352b0902921
-	golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f
+	golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90
+	golang.org/x/sys v0.0.0-20220909162455-aba9fc2a8ff2
 	gopkg.in/hlandau/passlib.v1 v1.0.11
 )
 
 require (
 	blitter.com/go/chacha20 v0.0.0-20200130200441-214e4085f54c // indirect
+	blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64 // indirect
 	blitter.com/go/mtwist v1.0.1 // indirect
-	github.com/klauspost/cpuid/v2 v2.0.6 // indirect
-	github.com/klauspost/reedsolomon v1.9.16 // indirect
+	github.com/klauspost/cpuid/v2 v2.1.1 // indirect
+	github.com/klauspost/reedsolomon v1.11.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161 // indirect
 	github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b // indirect

+ 16 - 18
go.sum

@@ -4,10 +4,12 @@ blitter.com/go/cryptmt v1.0.2 h1:ZcLhQk7onUssXyQwG3GdXDXctCVnNL+b7aFuvwOdKXc=
 blitter.com/go/cryptmt v1.0.2/go.mod h1:tdME2J3O4agaDAYIYNQzzuB28yVGnPSMmV3a/ucSU84=
 blitter.com/go/goutmp v1.0.6 h1:jRKRw2WalVBza4T50etAfbvT2xp9G5uykIHTvyB5r0k=
 blitter.com/go/goutmp v1.0.6/go.mod h1:DnK/uLBu1/1yLFiuVlmwvWErzAWVp+pDv7t6ZaQRLNc=
+blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64 h1:SH6cZ4JiOTmWGeVd5hCgt8gsMvfPPHWpEwNdxfsBugM=
+blitter.com/go/groestl v0.0.0-20220410000905-c4decbf31d64/go.mod h1:YMdIR/gCtFwU/a09jyWAwUu2J9CQejUFwkfD+PyVg+4=
 blitter.com/go/herradurakex v1.0.0 h1:6XaxY+JLT1HUWPF0gYJnjX3pVjrw4YhYZEzZ1U0wkyc=
 blitter.com/go/herradurakex v1.0.0/go.mod h1:m3+vYZX+2dDjdo+n/HDnXEYJX9pwmNeQLgAfJM8mtxw=
-blitter.com/go/hopscotch v0.0.0-20211113042251-b8a306eea4dc h1:IS+jxdKSdlqp6TWG3yMoBde/cctBEMwMDg588JHxgTE=
-blitter.com/go/hopscotch v0.0.0-20211113042251-b8a306eea4dc/go.mod h1:9Da1oy0t9aUw3wviba+2mP1inbLGbDuCKAO3mmGQha4=
+blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a h1:1fEN7eJMG9TweQuGMAgQlTJ0Wl7lsdDL4Nt5gHZijhY=
+blitter.com/go/hopscotch v0.0.0-20220617051533-4b42ccd4e00a/go.mod h1:LtcFd2/R9xcau5SZIYeaHvdqAM7Y5pyvdZYT5J9HAME=
 blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 h1:D45AnrNphtvczBXRp5JQicZRTgaK/Is5bgPDDvRKhTc=
 blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9/go.mod h1:SK6QfGG72lIfKW1Td0wH7f0wwN5nSIhV3K+wvzGNjrw=
 blitter.com/go/mtwist v1.0.1 h1:PxmoWexfMpLmc8neHP/PcRc3s17ct7iz4d5W/qJVt04=
@@ -45,14 +47,14 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y=
 github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f/go.mod h1:u+9Snq0w+ZdYKi8BBoaxnEwWu0fY4Kvu9ByFpM51t1s=
-github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
-github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/reedsolomon v1.9.16 h1:mR0AwphBwqFv/I3B9AHtNKvzuowI1vrj8/3UX4XRmHA=
-github.com/klauspost/reedsolomon v1.9.16/go.mod h1:eqPAcE7xar5CIzcdfwydOEdcmchAKAP/qs14y4GCBOk=
+github.com/klauspost/cpuid/v2 v2.1.1 h1:t0wUqjowdm8ezddV5k0tLWVklVuvLJpoHeb4WBdydm0=
+github.com/klauspost/cpuid/v2 v2.1.1/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
+github.com/klauspost/reedsolomon v1.11.0 h1:fc24kMFf4I6dXJwSkVAsw8Za/dMcJrV5ImeDjG3ss1M=
+github.com/klauspost/reedsolomon v1.11.0/go.mod h1:FXLZzlJIdfqEnQLdUKWNRuMZg747hZ4oYp2Ml60Lb/k=
 github.com/kuking/go-frodokem v1.0.2 h1:sxdguENCyr6WnLbJ/cjz0AYCW75H1b+E6zXY2ldZnUU=
 github.com/kuking/go-frodokem v1.0.2/go.mod h1:83ZX1kHOd72ouCsvbffCqJIj7Ih83MQTAjH2QbqzLZk=
-github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
-github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
+github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -79,8 +81,8 @@ golang.org/x/crypto v0.0.0-20200128174031-69ecbb4d6d5d/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20220408190544-5352b0902921 h1:iU7T1X1J6yxDr0rda54sWGkHgOp5XJrqm79gcNlC2VM=
-golang.org/x/crypto v0.0.0-20220408190544-5352b0902921/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90 h1:Y/gsMcFOcR+6S6f3YeMKl5g+dZMEWqcz5Czj/GWYbkM=
+golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -102,16 +104,12 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190902133755-9109b7679e13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f h1:8w7RhxzTVgUzw/AH/9mUV5q0vMgy40SQRursCcfmkCw=
-golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220909162455-aba9fc2a8ff2 h1:wM1k/lXfpc5HdkJJyW9GELpd8ERGdnh8sMGL6Gzq3Ho=
+golang.org/x/sys v0.0.0-20220909162455-aba9fc2a8ff2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=

+ 27 - 7
vendor/blitter.com/go/hopscotch/hopscotch.go

@@ -25,10 +25,11 @@ import (
 	_ "crypto/sha512"
 
 	b2b "golang.org/x/crypto/blake2b"
+	groestl "blitter.com/go/groestl"
 )
 
 const (
-	maxResched = 10 // above 20 starts to show outlines in 'tuxtest' ... so 10 max
+	maxResched = 99 // above 20 starts to show outlines in 'tuxtest' ... so 10 max
 )
 
 type Cipher struct {
@@ -71,9 +72,10 @@ func New(r io.Reader, w io.Writer, resched int, key []byte) (c *Cipher) {
 	}
 
 	// Init all the hash algs we're going to 'hop' around with initial keystream
-	c.h = make([]hash.Hash, 2)
+	c.h = make([]hash.Hash, 3)
 	c.h[0] = sha512.New()
 	c.h[1], _ = b2b.New512(c.k)
+	c.h[2] = groestl.New512()
 	c.keyUpdate(c.k)
 
 	c.rekeyCtr = len(c.hs) * c.resched // lower multiplier == greater security, lower speed
@@ -109,6 +111,11 @@ func (c *Cipher) keyUpdate(data []byte) {
 		sliceTmp := b2b.Sum512(data)
 		c.hs = append(c.hs, sliceTmp[:]...)
 	}
+	{
+		c.h[2].Write(data)
+		sliceTmp := groestl.Sum512(data)
+		c.hs = append(c.hs, sliceTmp[:]...)
+	}
 }
 
 func (c *Cipher) yield(ib byte) (ob byte) {
@@ -118,25 +125,38 @@ func (c *Cipher) yield(ib byte) (ob byte) {
 	//fmt.Fprintf(os.Stderr, "[c.hidx:%v c.idx:%v]\n", c.hidx, c.idx)
 
 	// NOTE: using a non-prime modulus degrades CV % from ~ 0.055 to ~ 0.07
-	switch c.ctr % 3 {
+	switch c.ctr % 5 {
 	case 0:
 		ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
 			c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3] ^ c.hs[len(c.hs)-5] ^
-			c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17]
+			c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^
+			c.hs[len(c.hs)-47] ^ c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39]
 
 	case 1:
 		ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
 			c.hs[len(c.hs)-5] ^ c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-11] ^ c.hs[len(c.hs)-13] ^
-			c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-29]
+			c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-19] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-29] ^
+			c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-37]
 
 	case 2:
 		ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
 			c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
-			c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3]
+			c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-2] ^ c.hs[len(c.hs)-3] ^
+			c.hs[len(c.hs)-37] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-47]
+	case 3:
+		ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
+			c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
+			c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-5] ^ c.hs[len(c.hs)-3] ^
+			c.hs[len(c.hs)-43] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-39] ^ c.hs[len(c.hs)-37]
+	case 4:
+		ob = c.bTmp ^ ib ^ byte(c.ctr) ^ byte(c.idx) ^
+			c.hs[len(c.hs)-13] ^ c.hs[len(c.hs)-17] ^ c.hs[len(c.hs)-23] ^ c.hs[len(c.hs)-27] ^
+			c.hs[len(c.hs)-29] ^ c.hs[len(c.hs)-31] ^ c.hs[len(c.hs)-7] ^ c.hs[len(c.hs)-3] ^
+			c.hs[len(c.hs)-33] ^ c.hs[len(c.hs)-41] ^ c.hs[len(c.hs)-45] ^ c.hs[len(c.hs)-43]
 	}
 
 	if c.ctr%c.rekeyCtr == 0 {
-		bufTmp := make([]byte, 32)
+		bufTmp := make([]byte, 16*3)
 		_, _ = c.prng.Read(bufTmp)
 		c.keyUpdate(bufTmp)
 	}

+ 0 - 67
vendor/github.com/klauspost/cpuid/v2/.travis.yml

@@ -1,67 +0,0 @@
-language: go
-
-os:
-  - linux
-  - osx
-  - windows
-
-arch:
-  - amd64
-  - arm64
-
-go:
-  - 1.13.x
-  - 1.14.x
-  - 1.15.x
-  - 1.16.x
-  - master
-
-env:
-  - CGO_ENABLED=0
-
-script:
-  - go vet ./...
-  - go test -test.v -test.run ^TestCPUID$
-  - CGO_ENABLED=1 go test -race ./...
-  - go test -tags=nounsafe -test.v -test.run ^TestCPUID$
-  - go test -tags=noasm ./...
-  - go run ./cmd/cpuid/main.go
-  - go run ./cmd/cpuid/main.go -json
-
-matrix:
-  allow_failures:
-    - go: 'master'
-  fast_finish: true
-  include:
-    - stage: other
-      go: 1.16.x
-      os: linux
-      arch: amd64
-      script:
-        - diff <(gofmt -d .) <(printf "")
-        - diff <(gofmt -d ./private) <(printf "")
-        - curl -sfL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- check # check goreleaser config for deprecations
-        - curl -sL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- --snapshot --skip-publish --rm-dist
-        - go get github.com/klauspost/asmfmt&&go install github.com/klauspost/asmfmt/cmd/asmfmt
-        - diff <(asmfmt -d .) <(printf "")
-        - GOOS=linux GOARCH=386 go test .
-        - ./test-architectures.sh
-    - stage: other
-      go: 1.15.x
-      os: linux
-      arch: amd64
-      script:
-        - ./test-architectures.sh
-
-deploy:
-  - provider: script
-    skip_cleanup: true
-    script: curl -sL https://git.io/goreleaser | VERSION=v0.157.0 bash || true
-    on:
-      tags: true
-      condition: ($TRAVIS_OS_NAME = linux) && ($TRAVIS_CPU_ARCH = amd64)
-      go: 1.16.x
-branches:
-  only:
-    - master
-    - /^v\d+\.\d+(\.\d+)?(-\S*)?$/

+ 123 - 2
vendor/github.com/klauspost/cpuid/v2/README.md

@@ -39,10 +39,10 @@ func main() {
 	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
 	fmt.Println("LogicalCores:", CPU.LogicalCores)
 	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
-	fmt.Println("Features:", fmt.Sprintf(strings.Join(CPU.FeatureSet(), ",")))
+	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
 	fmt.Println("Cacheline bytes:", CPU.CacheLine)
 	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
-	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
 	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
 	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
 	fmt.Println("Frequency", CPU.Hz, "hz")
@@ -132,6 +132,127 @@ func main() {
 }
 ```
 
+## commandline
+
+Download as binary from: https://github.com/klauspost/cpuid/releases
+
+Install from source:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+### Example
+
+```
+λ cpuid
+Name: AMD Ryzen 9 3950X 16-Core Processor
+Vendor String: AuthenticAMD
+Vendor ID: AMD
+PhysicalCores: 16
+Threads Per Core: 2
+Logical Cores: 32
+CPU Family 23 Model: 113
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
+Microarchitecture level: 3
+Cacheline bytes: 64
+L1 Instruction Cache: 32768 bytes
+L1 Data Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+
+```
+### JSON Output:
+
+```
+λ cpuid --json
+{
+  "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
+  "VendorID": 2,
+  "VendorString": "AuthenticAMD",
+  "PhysicalCores": 16,
+  "ThreadsPerCore": 2,
+  "LogicalCores": 32,
+  "Family": 23,
+  "Model": 113,
+  "CacheLine": 64,
+  "Hz": 0,
+  "BoostFreq": 0,
+  "Cache": {
+    "L1I": 32768,
+    "L1D": 32768,
+    "L2": 524288,
+    "L3": 16777216
+  },
+  "SGX": {
+    "Available": false,
+    "LaunchControl": false,
+    "SGX1Supported": false,
+    "SGX2Supported": false,
+    "MaxEnclaveSizeNot64": 0,
+    "MaxEnclaveSize64": 0,
+    "EPCSections": null
+  },
+  "Features": [
+    "ADX",
+    "AESNI",
+    "AVX",
+    "AVX2",
+    "BMI1",
+    "BMI2",
+    "CLMUL",
+    "CLZERO",
+    "CMOV",
+    "CMPXCHG8",
+    "CPBOOST",
+    "CX16",
+    "F16C",
+    "FMA3",
+    "FXSR",
+    "FXSROPT",
+    "HTT",
+    "HYPERVISOR",
+    "LAHF",
+    "LZCNT",
+    "MCAOVERFLOW",
+    "MMX",
+    "MMXEXT",
+    "MOVBE",
+    "NX",
+    "OSXSAVE",
+    "POPCNT",
+    "RDRAND",
+    "RDSEED",
+    "RDTSCP",
+    "SCE",
+    "SHA",
+    "SSE",
+    "SSE2",
+    "SSE3",
+    "SSE4",
+    "SSE42",
+    "SSE4A",
+    "SSSE3",
+    "SUCCOR",
+    "X87",
+    "XSAVE"
+  ],
+  "X64Level": 3
+}
+```
+
+### Check CPU microarch level
+
+```
+λ cpuid --check-level=3
+2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
+Exit Code 0
+
+λ cpuid --check-level=4
+2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
+Exit Code 1
+```
+
 # license
 
 This code is published under an MIT license. See LICENSE file for more information.

+ 314 - 46
vendor/github.com/klauspost/cpuid/v2/cpuid.go

@@ -14,6 +14,7 @@ import (
 	"flag"
 	"fmt"
 	"math"
+	"math/bits"
 	"os"
 	"runtime"
 	"strings"
@@ -83,6 +84,7 @@ const (
 	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
 	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
 	AVX512F                             // AVX-512 Foundation
+	AVX512FP16                          // AVX-512 FP16 Instructions
 	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
 	AVX512PF                            // AVX-512 Prefetch Instructions
 	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
@@ -91,21 +93,32 @@ const (
 	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
 	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
 	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
-	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one.
+	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
 	BMI1                                // Bit Manipulation Instruction Set 1
 	BMI2                                // Bit Manipulation Instruction Set 2
+	CETIBT                              // Intel CET Indirect Branch Tracking
+	CETSS                               // Intel CET Shadow Stack
 	CLDEMOTE                            // Cache Line Demote
 	CLMUL                               // Carry-less Multiplication
+	CLZERO                              // CLZERO instruction supported
 	CMOV                                // i686 CMOV
+	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
+	CMPXCHG8                            // CMPXCHG8 instruction
+	CPBOOST                             // Core Performance Boost
 	CX16                                // CMPXCHG16B Instruction
 	ENQCMD                              // Enqueue Command
 	ERMS                                // Enhanced REP MOVSB/STOSB
 	F16C                                // Half-precision floating-point conversion
 	FMA3                                // Intel FMA 3. Does not imply AVX.
 	FMA4                                // Bulldozer FMA4 functions
-	GFNI                                // Galois Field New Instructions
+	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                             // FXSAVE/FXRSTOR optimizations
+	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
 	HLE                                 // Hardware Lock Elision
+	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
 	HTT                                 // Hyperthreading (enabled)
+	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
 	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
 	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
 	IBS                                 // Instruction Based Sampling (AMD)
@@ -117,22 +130,48 @@ const (
 	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
 	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
 	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
+	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
+	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
+	LAHF                                // LAHF/SAHF in long mode
+	LAM                                 // If set, CPU supports Linear Address Masking
+	LBRVIRT                             // LBR virtualization
 	LZCNT                               // LZCNT instruction
+	MCAOVERFLOW                         // MCA overflow recovery support.
+	MCOMMIT                             // MCOMMIT instruction supported
 	MMX                                 // standard MMX
 	MMXEXT                              // SSE integer functions or AMD MMX ext
+	MOVBE                               // MOVBE instruction (big-endian)
 	MOVDIR64B                           // Move 64 Bytes as Direct Store
 	MOVDIRI                             // Move Doubleword as Direct Store
+	MOVSB_ZL                            // Fast Zero-Length MOVSB
 	MPX                                 // Intel MPX (Memory Protection Extensions)
+	MSRIRC                              // Instruction Retired Counter MSR available
+	MSR_PAGEFLUSH                       // Page Flush MSR available
+	NRIPS                               // Indicates support for NRIP save on VMEXIT
 	NX                                  // NX (No-Execute) bit
+	OSXSAVE                             // XSAVE enabled by OS
+	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
 	POPCNT                              // POPCNT instruction
+	RDPRU                               // RDPRU instruction supported
 	RDRAND                              // RDRAND instruction is available
 	RDSEED                              // RDSEED instruction is available
 	RDTSCP                              // RDTSCP Instruction
 	RTM                                 // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
 	SERIALIZE                           // Serialize Instruction Execution
+	SEV                                 // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
+	SEV_ES                              // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
+	SEV_SNP                             // AMD SEV Secure Nested Paging supported
 	SGX                                 // Software Guard Extensions
 	SGXLC                               // Software Guard Extensions Launch Control
 	SHA                                 // Intel SHA Extensions
+	SME                                 // AMD Secure Memory Encryption supported
+	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
 	SSE                                 // SSE functions
 	SSE2                                // P4 SSE functions
 	SSE3                                // Prescott SSE3 functions
@@ -141,14 +180,38 @@ const (
 	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
 	SSSE3                               // Conroe SSSE3 functions
 	STIBP                               // Single Thread Indirect Branch Predictors
+	STOSB_SHORT                         // Fast short STOSB
+	SUCCOR                              // Software uncorrectable error containment and recovery capability.
+	SVM                                 // AMD Secure Virtual Machine
+	SVMDA                               // Indicates support for the SVM decode assists.
+	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                               // AMD SVM nested paging
+	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                               // SYSENTER and SYSEXIT instructions
 	TBM                                 // AMD Trailing Bit Manipulation
+	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
 	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
-	VAES                                // Vector AES
+	VAES                                // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                // AMD VM Permission Levels supported
+	VMSA_REGPROT                        // AMD VMSA Register Protection supported
 	VMX                                 // Virtual Machine Extensions
-	VPCLMULQDQ                          // Carry-Less Multiplication Quadword
+	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                 // AMD Virtual Transparent Encryption supported
 	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
 	WBNOINVD                            // Write Back and Do Not Invalidate Cache
+	X87                                 // FPU
+	XGETBV1                             // Supports XGETBV with ECX = 1
 	XOP                                 // Bulldozer XOP functions
+	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                            // XSAVEOPT available
+	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
 
 	// ARM features:
 	AESARM   // AES instructions
@@ -175,7 +238,6 @@ const (
 	SM3      // SM3 instructions
 	SM4      // SM4 instructions
 	SVE      // Scalable Vector Extension
-
 	// Keep it last. It automatically defines the size of []flagSet
 	lastID
 
@@ -193,8 +255,10 @@ type CPUInfo struct {
 	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
 	Family         int     // CPU family number
 	Model          int     // CPU model number
+	Stepping       int     // CPU stepping info
 	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
-	Hz             int64   // Clock speed, if known, 0 otherwise
+	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
+	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
 	Cache          struct {
 		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
 		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
@@ -298,6 +362,41 @@ func (c CPUInfo) Has(id FeatureID) bool {
 	return c.featureSet.inSet(id)
 }
 
+// AnyOf returns whether the CPU supports one or more of the requested features.
+func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if c.featureSet.inSet(id) {
+			return true
+		}
+	}
+	return false
+}
+
+// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
+var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+
+// X64Level returns the microarchitecture level detected on the CPU.
+// If features are lacking or non x64 mode, 0 is returned.
+// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+func (c CPUInfo) X64Level() int {
+	if c.featureSet.hasSet(level4Features) {
+		return 4
+	}
+	if c.featureSet.hasSet(level3Features) {
+		return 3
+	}
+	if c.featureSet.hasSet(level2Features) {
+		return 2
+	}
+	if c.featureSet.hasSet(level1Features) {
+		return 1
+	}
+	return 0
+}
+
 // Disable will disable one or several features.
 func (c *CPUInfo) Disable(ids ...FeatureID) bool {
 	for _, id := range ids {
@@ -320,11 +419,10 @@ func (c CPUInfo) IsVendor(v Vendor) bool {
 	return c.VendorID == v
 }
 
+// FeatureSet returns all available features as strings.
 func (c CPUInfo) FeatureSet() []string {
-	s := make([]string, 0)
-	for _, f := range c.featureSet.Strings() {
-		s = append(s, f)
-	}
+	s := make([]string, 0, c.featureSet.nEnabled())
+	s = append(s, c.featureSet.Strings()...)
 	return s
 }
 
@@ -363,25 +461,42 @@ func (c CPUInfo) LogicalCPU() int {
 	return int(ebx >> 24)
 }
 
-// hertz tries to compute the clock speed of the CPU. If leaf 15 is
+// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
 // supported, use it, otherwise parse the brand string. Yes, really.
-func hertz(model string) int64 {
+func (c *CPUInfo) frequencies() {
+	c.Hz, c.BoostFreq = 0, 0
 	mfi := maxFunctionID()
 	if mfi >= 0x15 {
 		eax, ebx, ecx, _ := cpuid(0x15)
 		if eax != 0 && ebx != 0 && ecx != 0 {
-			return int64((int64(ecx) * int64(ebx)) / int64(eax))
+			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
+		}
+	}
+	if mfi >= 0x16 {
+		a, b, _, _ := cpuid(0x16)
+		// Base...
+		if a&0xffff > 0 {
+			c.Hz = int64(a&0xffff) * 1_000_000
+		}
+		// Boost...
+		if b&0xffff > 0 {
+			c.BoostFreq = int64(b&0xffff) * 1_000_000
 		}
 	}
+	if c.Hz > 0 {
+		return
+	}
+
 	// computeHz determines the official rated speed of a CPU from its brand
 	// string. This insanity is *actually the official documented way to do
 	// this according to Intel*, prior to leaf 0x15 existing. The official
 	// documentation only shows this working for exactly `x.xx` or `xxxx`
 	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
 	// sizes.
+	model := c.BrandName
 	hz := strings.LastIndex(model, "Hz")
 	if hz < 3 {
-		return 0
+		return
 	}
 	var multiplier int64
 	switch model[hz-1] {
@@ -393,7 +508,7 @@ func hertz(model string) int64 {
 		multiplier = 1000 * 1000 * 1000 * 1000
 	}
 	if multiplier == 0 {
-		return 0
+		return
 	}
 	freq := int64(0)
 	divisor := int64(0)
@@ -405,21 +520,22 @@ func hertz(model string) int64 {
 			decimalShift *= 10
 		} else if model[i] == '.' {
 			if divisor != 0 {
-				return 0
+				return
 			}
 			divisor = decimalShift
 		} else {
-			return 0
+			return
 		}
 	}
 	// we didn't find a space
 	if i < 0 {
-		return 0
+		return
 	}
 	if divisor != 0 {
-		return (freq * multiplier) / divisor
+		c.Hz = (freq * multiplier) / divisor
+		return
 	}
-	return freq * multiplier
+	c.Hz = freq * multiplier
 }
 
 // VM Will return true if the cpu id indicates we are in
@@ -468,6 +584,32 @@ func (s *flagSet) or(other flagSet) {
 	}
 }
 
+// hasSet returns whether all features are present.
+func (s flagSet) hasSet(other flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+// nEnabled will return the number of enabled flags.
+func (s flagSet) nEnabled() (n int) {
+	for _, v := range s[:] {
+		n += bits.OnesCount64(uint64(v))
+	}
+	return n
+}
+
+func flagSetWith(feat ...FeatureID) flagSet {
+	var res flagSet
+	for _, f := range feat {
+		res.set(f)
+	}
+	return res
+}
+
 // ParseFeature will parse the string and return the ID of the matching feature.
 // Will return UNKNOWN if not found.
 func ParseFeature(s string) FeatureID {
@@ -548,7 +690,7 @@ func threadsPerCore() int {
 		if vend == AMD {
 			// Workaround for AMD returning 0, assume 2 if >= Zen 2
 			// It will be more correct than not.
-			fam, _ := familyModel()
+			fam, _, _ := familyModel()
 			_, _, _, d := cpuid(1)
 			if (d&(1<<28)) != 0 && fam >= 23 {
 				return 2
@@ -586,14 +728,27 @@ func logicalCores() int {
 	}
 }
 
-func familyModel() (int, int) {
+func familyModel() (family, model, stepping int) {
 	if maxFunctionID() < 0x1 {
-		return 0, 0
+		return 0, 0, 0
 	}
 	eax, _, _, _ := cpuid(1)
-	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
-	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
-	return int(family), int(model)
+	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
+	family = int((eax >> 8) & 0xf)
+	extFam := family == 0x6 // Intel is 0x6, needs extended model.
+	if family == 0xf {
+		// Add ExtFamily
+		family += int((eax >> 20) & 0xff)
+		extFam = true
+	}
+	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
+	model = int((eax >> 4) & 0xf)
+	if extFam {
+		// Add ExtModel
+		model += int((eax >> 12) & 0xf0)
+	}
+	stepping = int(eax & 0xf)
+	return family, model, stepping
 }
 
 func physicalCores() int {
@@ -677,6 +832,7 @@ func (c *CPUInfo) cacheSize() {
 		if maxFunctionID() < 4 {
 			return
 		}
+		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
 		for i := uint32(0); ; i++ {
 			eax, ebx, ecx, _ := cpuidex(4, i)
 			cacheType := eax & 15
@@ -727,9 +883,14 @@ func (c *CPUInfo) cacheSize() {
 		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
 
 		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
-		if maxExtendedFunction() < 0x8000001D {
+		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
 			return
 		}
+
+		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
+		// Hack: When we encounter the same entry 100 times we break.
+		nSame := 0
+		var last uint32
 		for i := uint32(0); i < math.MaxUint32; i++ {
 			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
 
@@ -745,6 +906,16 @@ func (c *CPUInfo) cacheSize() {
 				return
 			}
 
+			// Check for the same value repeated.
+			comb := eax ^ ebx ^ ecx
+			if comb == last {
+				nSame++
+				if nSame == 100 {
+					return
+				}
+			}
+			last = comb
+
 			switch level {
 			case 1:
 				switch typ {
@@ -769,8 +940,6 @@ func (c *CPUInfo) cacheSize() {
 			}
 		}
 	}
-
-	return
 }
 
 type SGXEPCSection struct {
@@ -831,21 +1000,26 @@ func support() flagSet {
 	if mfi < 0x1 {
 		return fs
 	}
-	family, model := familyModel()
+	family, model, _ := familyModel()
 
 	_, _, c, d := cpuid(1)
+	fs.setIf((d&(1<<0)) != 0, X87)
+	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
+	fs.setIf((d&(1<<11)) != 0, SYSEE)
 	fs.setIf((d&(1<<15)) != 0, CMOV)
 	fs.setIf((d&(1<<23)) != 0, MMX)
-	fs.setIf((d&(1<<25)) != 0, MMXEXT)
+	fs.setIf((d&(1<<24)) != 0, FXSR)
+	fs.setIf((d&(1<<25)) != 0, FXSROPT)
 	fs.setIf((d&(1<<25)) != 0, SSE)
 	fs.setIf((d&(1<<26)) != 0, SSE2)
 	fs.setIf((c&1) != 0, SSE3)
 	fs.setIf((c&(1<<5)) != 0, VMX)
-	fs.setIf((c&0x00000200) != 0, SSSE3)
-	fs.setIf((c&0x00080000) != 0, SSE4)
-	fs.setIf((c&0x00100000) != 0, SSE42)
+	fs.setIf((c&(1<<9)) != 0, SSSE3)
+	fs.setIf((c&(1<<19)) != 0, SSE4)
+	fs.setIf((c&(1<<20)) != 0, SSE42)
 	fs.setIf((c&(1<<25)) != 0, AESNI)
 	fs.setIf((c&(1<<1)) != 0, CLMUL)
+	fs.setIf(c&(1<<22) != 0, MOVBE)
 	fs.setIf(c&(1<<23) != 0, POPCNT)
 	fs.setIf(c&(1<<30) != 0, RDRAND)
 
@@ -861,6 +1035,8 @@ func support() flagSet {
 	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
 		fs.setIf(threadsPerCore() > 1, HTT)
 	}
+	fs.setIf(c&1<<26 != 0, XSAVE)
+	fs.setIf(c&1<<27 != 0, OSXSAVE)
 	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
 	const avxCheck = 1<<26 | 1<<27 | 1<<28
 	if c&avxCheck == avxCheck {
@@ -886,7 +1062,6 @@ func support() flagSet {
 	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
 	if mfi >= 7 {
 		_, ebx, ecx, edx := cpuidex(7, 0)
-		eax1, _, _, _ := cpuidex(7, 1)
 		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
 			fs.set(AVX2)
 		}
@@ -903,19 +1078,38 @@ func support() flagSet {
 		fs.setIf(ebx&(1<<18) != 0, RDSEED)
 		fs.setIf(ebx&(1<<19) != 0, ADX)
 		fs.setIf(ebx&(1<<29) != 0, SHA)
+
 		// CPUID.(EAX=7, ECX=0).ECX
 		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+		fs.setIf(ecx&(1<<7) != 0, CETSS)
+		fs.setIf(ecx&(1<<8) != 0, GFNI)
+		fs.setIf(ecx&(1<<9) != 0, VAES)
+		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+		fs.setIf(ecx&(1<<13) != 0, TME)
 		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
 		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
 		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
 		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
 		fs.setIf(ecx&(1<<30) != 0, SGXLC)
+
 		// CPUID.(EAX=7, ECX=0).EDX
+		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
 		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
 		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+		fs.setIf(edx&(1<<18) != 0, PCONFIG)
+		fs.setIf(edx&(1<<20) != 0, CETIBT)
 		fs.setIf(edx&(1<<26) != 0, IBPB)
 		fs.setIf(edx&(1<<27) != 0, STIBP)
 
+		// CPUID.(EAX=7, ECX=1)
+		eax1, _, _, _ := cpuidex(7, 1)
+		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
+		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
+		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
+		fs.setIf(eax1&(1<<22) != 0, HRESET)
+		fs.setIf(eax1&(1<<26) != 0, LAM)
+
 		// Only detect AVX-512 features if XGETBV is supported
 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
 			// Check for OS support
@@ -940,15 +1134,13 @@ func support() flagSet {
 				// ecx
 				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
 				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
-				fs.setIf(ecx&(1<<8) != 0, GFNI)
-				fs.setIf(ecx&(1<<9) != 0, VAES)
-				fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
 				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
 				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
 				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
 				// edx
 				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
 				fs.setIf(edx&(1<<22) != 0, AMXBF16)
+				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
 				fs.setIf(edx&(1<<24) != 0, AMXTILE)
 				fs.setIf(edx&(1<<25) != 0, AMXINT8)
 				// eax1 = CPUID.(EAX=7, ECX=1).EAX
@@ -956,33 +1148,91 @@ func support() flagSet {
 			}
 		}
 	}
-
+	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
+	// EAX
+	// Bit 00: XSAVEOPT is available.
+	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
+	// Bit 02: Supports XGETBV with ECX = 1 if set.
+	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
+	// Bits 31 - 04: Reserved.
+	// EBX
+	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
+	// ECX
+	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
+	// EDX?
+	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
+	if mfi >= 0xd {
+		if fs.inSet(XSAVE) {
+			eax, _, _, _ := cpuidex(0xd, 1)
+			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
+			fs.setIf(eax&(1<<1) != 0, XSAVEC)
+			fs.setIf(eax&(1<<2) != 0, XGETBV1)
+			fs.setIf(eax&(1<<3) != 0, XSAVES)
+		}
+	}
 	if maxExtendedFunction() >= 0x80000001 {
 		_, _, c, d := cpuid(0x80000001)
 		if (c & (1 << 5)) != 0 {
 			fs.set(LZCNT)
 			fs.set(POPCNT)
 		}
-		fs.setIf((c&(1<<10)) != 0, IBS)
-		fs.setIf((d&(1<<31)) != 0, AMD3DNOW)
-		fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT)
-		fs.setIf((d&(1<<23)) != 0, MMX)
-		fs.setIf((d&(1<<22)) != 0, MMXEXT)
+		// ECX
+		fs.setIf((c&(1<<0)) != 0, LAHF)
+		fs.setIf((c&(1<<2)) != 0, SVM)
 		fs.setIf((c&(1<<6)) != 0, SSE4A)
+		fs.setIf((c&(1<<10)) != 0, IBS)
+		fs.setIf((c&(1<<22)) != 0, TOPEXT)
+
+		// EDX
+		fs.setIf(d&(1<<11) != 0, SYSCALL)
 		fs.setIf(d&(1<<20) != 0, NX)
+		fs.setIf(d&(1<<22) != 0, MMXEXT)
+		fs.setIf(d&(1<<23) != 0, MMX)
+		fs.setIf(d&(1<<24) != 0, FXSR)
+		fs.setIf(d&(1<<25) != 0, FXSROPT)
 		fs.setIf(d&(1<<27) != 0, RDTSCP)
+		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
+		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
 
 		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
 		 * used unless the OS has AVX support. */
 		if fs.inSet(AVX) {
-			fs.setIf((c&0x00000800) != 0, XOP)
-			fs.setIf((c&0x00010000) != 0, FMA4)
+			fs.setIf((c&(1<<11)) != 0, XOP)
+			fs.setIf((c&(1<<16)) != 0, FMA4)
 		}
 
 	}
+	if maxExtendedFunction() >= 0x80000007 {
+		_, b, _, d := cpuid(0x80000007)
+		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
+		fs.setIf((b&(1<<1)) != 0, SUCCOR)
+		fs.setIf((b&(1<<2)) != 0, HWA)
+		fs.setIf((d&(1<<9)) != 0, CPBOOST)
+	}
+
 	if maxExtendedFunction() >= 0x80000008 {
 		_, b, _, _ := cpuid(0x80000008)
 		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
+		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+		fs.setIf((b&(1<<4)) != 0, RDPRU)
+		fs.setIf((b&(1<<3)) != 0, INVLPGB)
+		fs.setIf((b&(1<<1)) != 0, MSRIRC)
+		fs.setIf((b&(1<<0)) != 0, CLZERO)
+	}
+
+	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
+		_, _, _, edx := cpuid(0x8000000A)
+		fs.setIf((edx>>0)&1 == 1, SVMNP)
+		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
+		fs.setIf((edx>>2)&1 == 1, SVML)
+		fs.setIf((edx>>3)&1 == 1, NRIPS)
+		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
+		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
+		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
+		fs.setIf((edx>>7)&1 == 1, SVMDA)
+		fs.setIf((edx>>10)&1 == 1, SVMPF)
+		fs.setIf((edx>>12)&1 == 1, SVMPFT)
 	}
 
 	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
@@ -997,6 +1247,24 @@ func support() flagSet {
 		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
 	}
 
+	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
+		a, _, _, _ := cpuid(0x8000001f)
+		fs.setIf((a>>0)&1 == 1, SME)
+		fs.setIf((a>>1)&1 == 1, SEV)
+		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
+		fs.setIf((a>>3)&1 == 1, SEV_ES)
+		fs.setIf((a>>4)&1 == 1, SEV_SNP)
+		fs.setIf((a>>5)&1 == 1, VMPL)
+		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
+		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
+		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
+		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
+		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
+		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
+		fs.setIf((a>>16)&1 == 1, VTE)
+		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
+	}
+
 	return fs
 }
 

+ 2 - 1
vendor/github.com/klauspost/cpuid/v2/detect_arm64.go

@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build arm64,!gccgo,!noasm,!appengine
+//go:build arm64 && !gccgo && !noasm && !appengine
+// +build arm64,!gccgo,!noasm,!appengine
 
 package cpuid
 

+ 2 - 1
vendor/github.com/klauspost/cpuid/v2/detect_ref.go

@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build !amd64,!386,!arm64 gccgo noasm appengine
+//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
+// +build !amd64,!386,!arm64 gccgo noasm appengine
 
 package cpuid
 

+ 4 - 3
vendor/github.com/klauspost/cpuid/v2/detect_x86.go

@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
+//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
+// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
 
 package cpuid
 
@@ -23,13 +24,13 @@ func addInfo(c *CPUInfo, safe bool) {
 	c.maxExFunc = maxExtendedFunction()
 	c.BrandName = brandName()
 	c.CacheLine = cacheLine()
-	c.Family, c.Model = familyModel()
+	c.Family, c.Model, c.Stepping = familyModel()
 	c.featureSet = support()
 	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
-	c.Hz = hertz(c.BrandName)
 	c.cacheSize()
+	c.frequencies()
 }

File diff suppressed because it is too large
+ 153 - 91
vendor/github.com/klauspost/cpuid/v2/featureid_string.go


+ 107 - 5
vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go

@@ -2,18 +2,120 @@
 
 package cpuid
 
-import "runtime"
+import (
+	"runtime"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
 
 func detectOS(c *CPUInfo) bool {
+	if runtime.GOOS != "ios" {
+		tryToFillCPUInfoFomSysctl(c)
+	}
 	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
 	// to detect their supported state dynamically. Assume the CPU features that
 	// Apple Silicon M1 supports to be available as a minimal set of features
 	// to all Go programs running on darwin/arm64.
 	// TODO: Add more if we know them.
 	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
-	c.PhysicalCores = runtime.NumCPU()
-	// For now assuming 1 thread per core...
-	c.ThreadsPerCore = 1
-	c.LogicalCores = c.PhysicalCores
+
 	return true
 }
+
+func sysctlGetBool(name string) bool {
+	value, err := unix.SysctlUint32(name)
+	if err != nil {
+		return false
+	}
+	return value != 0
+}
+
+func sysctlGetString(name string) string {
+	value, err := unix.Sysctl(name)
+	if err != nil {
+		return ""
+	}
+	return value
+}
+
+func sysctlGetInt(unknown int, names ...string) int {
+	for _, name := range names {
+		value, err := unix.SysctlUint32(name)
+		if err != nil {
+			continue
+		}
+		if value != 0 {
+			return int(value)
+		}
+	}
+	return unknown
+}
+
+func sysctlGetInt64(unknown int, names ...string) int {
+	for _, name := range names {
+		value64, err := unix.SysctlUint64(name)
+		if err != nil {
+			continue
+		}
+		if int(value64) != unknown {
+			return int(value64)
+		}
+	}
+	return unknown
+}
+
+func setFeature(c *CPUInfo, name string, feature FeatureID) {
+	c.featureSet.setIf(sysctlGetBool(name), feature)
+}
+func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
+	c.BrandName = sysctlGetString("machdep.cpu.brand_string")
+
+	if len(c.BrandName) != 0 {
+		c.VendorString = strings.Fields(c.BrandName)[0]
+	}
+
+	c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
+	c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
+		sysctlGetInt(1, "hw.physicalcpu")
+	c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
+	c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
+	c.Model = sysctlGetInt(0, "machdep.cpu.model")
+	c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
+	c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
+	c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
+	c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
+	c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
+
+	// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
+	setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
+	setFeature(c, "hw.optional.AdvSIMD", ASIMD)
+	setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
+	setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
+	setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
+	setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
+	// setFeature(c, "", EVTSTRM)
+	setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
+	setFeature(c, "hw.optional.arm.FEAT_FP", FP)
+	setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
+	setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
+	setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
+	setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
+	setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
+	setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
+	setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
+	setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
+	setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
+	// setFeature(c, "", SM3)
+	// setFeature(c, "", SM4)
+	setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
+
+	// from empirical observation
+	setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
+	setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
+	setFeature(c, "hw.optional.floatingpoint", FP)
+	setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
+	setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
+	setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
+	setFeature(c, "hw.optional.armv8_crc32", CRC32)
+}

+ 2 - 3
vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go

@@ -1,8 +1,7 @@
 // Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
 
-// +build arm64
-// +build !linux
-// +build !darwin
+//go:build arm64 && !linux && !darwin
+// +build arm64,!linux,!darwin
 
 package cpuid
 

+ 2 - 1
vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go

@@ -1,6 +1,7 @@
 // Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build nounsafe
+//go:build nounsafe
+// +build nounsafe
 
 package cpuid
 

+ 2 - 1
vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go

@@ -1,6 +1,7 @@
 // Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build !nounsafe
+//go:build !nounsafe
+// +build !nounsafe
 
 package cpuid
 

+ 0 - 65
vendor/github.com/klauspost/reedsolomon/.travis.yml

@@ -1,65 +0,0 @@
-language: go
-
-os:
-  - linux
-  - osx
-  - windows
-
-arch:
-  - amd64
-  - arm64
-  - ppc64le
-  - s390x
-
-go:
-  - 1.14.x
-  - 1.15.x
-  - 1.16.x
-  - master
-
-env:
-  - GO111MODULE=off CGO_ENABLED=0
-
-install:
- - go get ./...
-
-script:
- - go vet ./...
- - go test -cpu=1,2 .
- - go test -tags=noasm -cpu=1,2 .
- - go build examples/simple-decoder.go
- - go build examples/simple-encoder.go
- - go build examples/stream-decoder.go
- - go build examples/stream-encoder.go
-
-jobs:
-  allow_failures:
-    - go: 'master'
-    - arch: s390x
-  fast_finish: true
-  include:
-    - stage: other
-      go: 1.16.x
-      os: linux
-      arch: amd64
-      script:
-        - diff <(gofmt -d .) <(printf "")
-        - diff <(gofmt -d ./examples) <(printf "")
-        - go get github.com/klauspost/asmfmt&&go install github.com/klauspost/asmfmt/cmd/asmfmt
-        - diff <(asmfmt -d .) <(printf "")
-        - CGO_ENABLED=1 go test -cpu=1 -short -race .
-        - CGO_ENABLED=1 go test -cpu=2 -short -race .
-        - CGO_ENABLED=1 go test -tags=noasm -cpu=1 -short -race .
-        - CGO_ENABLED=1 go test -tags=noasm -cpu=4 -short -race .
-        - CGO_ENABLED=1 go test -no-avx512 -short -race .
-        - CGO_ENABLED=1 go test -no-avx512 -no-avx2 -short -race .
-        - CGO_ENABLED=1 go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
-        - GOOS=linux GOARCH=386 go test -short .
-    - stage: other
-      go: 1.15.x
-      os: linux
-      arch: amd64
-      script:
-        - go test -no-avx512
-        - go test -no-avx512 -no-avx2
-        - go test -no-avx512 -no-avx2 -no-ssse3

+ 114 - 45
vendor/github.com/klauspost/reedsolomon/README.md

@@ -1,8 +1,5 @@
 # Reed-Solomon
-[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Build Status][3]][4]
-
-[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
-[4]: https://travis-ci.org/klauspost/reedsolomon
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
 
 Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
 
@@ -11,9 +8,12 @@ This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReed
 
 For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
 
+For encoding high shard counts (>256) a Leopard implementation is used.
+For most platforms this performs close to the original Leopard implementation in terms of speed. 
+
 Package home: https://github.com/klauspost/reedsolomon
 
-Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
 
 # Installation
 To get the package use the standard:
@@ -21,11 +21,18 @@ To get the package use the standard:
 go get -u github.com/klauspost/reedsolomon
 ```
 
-Using Go modules recommended.
+Using Go modules is recommended.
 
 # Changes
+
+## 2022
+
+* Leopard GF16 mode added, for up to 63336 shards. 
+* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
+
 ## 2021
 
+* Use `GOAMD64=v4` to enable faster AVX2.
 * Add progressive shard encoding.
 * Wider AVX2 loops
 * Limit concurrency on AVX2, since we are likely memory bound.
@@ -33,6 +40,8 @@ Using Go modules recommended.
 * Allow disabling inversion cache.
 * Faster AVX2 encoding.
 
+<details>
+	<summary>See older changes</summary>
 
 ## May 2020
 
@@ -96,6 +105,8 @@ The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamE
 handles this without modifying the interface. 
 This is a good lesson on why returning interfaces is not a good design.
 
+</details>
+
 # Usage
 
 This section assumes you know the basics of Reed-Solomon encoding. 
@@ -105,23 +116,19 @@ This package performs the calculation of the parity sets. The usage is therefore
 
 First of all, you need to choose your distribution of data and parity shards. 
 A 'good' distribution is very subjective, and will depend a lot on your usage scenario. 
-A good starting point is above 5 and below 257 data shards (the maximum supported number), 
-and the number of parity shards to be 2 or above, and below the number of data shards.
 
 To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
 ```Go
     enc, err := reedsolomon.New(10, 3)
 ```
 This encoder will work for all parity sets with this distribution of data and parity shards. 
-The error will only be set if you specify 0 or negative values in any of the parameters, 
-or if you specify more than 256 data shards.
 
 If you will primarily be using it with one shard size it is recommended to use 
 [`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
 as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
 It is not required that all shards are this size. 
 
-The you send and receive data  is a simple slice of byte slices; `[][]byte`. 
+Then you send and receive data that is a simple slice of byte slices; `[][]byte`. 
 In the example above, the top slice must have a length of 13.
 
 ```Go
@@ -187,6 +194,17 @@ If you are only interested in the data shards (for reading purposes) you can cal
     err := enc.ReconstructData(data)
 ```
 
+If you don't need all data shards you can use `ReconstructSome()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct just the shard 3
+    err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
+```
+
 So to sum up reconstruction:
 * The number of data/parity shards must match the numbers used for encoding.
 * The order of shards must be the same as used when encoding.
@@ -333,6 +351,8 @@ There is no buffering or timeouts/retry specified. If you want to add that, you
 For complete examples of a streaming encoder and decoder see the 
 [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
 
+GF16 (more than 256 shards) is not supported by the streaming interface. 
+
 # Advanced Options
 
 You can modify internal options which affects how jobs are split between and processed by goroutines.
@@ -346,8 +366,83 @@ Example of how to supply options:
      enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
  ```
 
+# Leopard Compatible GF16
+
+When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
+
+This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
+
+* The original and recovery data must not exceed 65536 pieces.
+* The shard size *must*  each be a multiple of 64 bytes.
+* Each buffer should have the same number of bytes.
+* Even the last shard must be rounded up to the block size.
+
+|                 | Regular | Leopard |
+|-----------------|---------|---------|
+| Encode          | ✓       | ✓       |
+| EncodeIdx       | ✓       | -       |
+| Verify          | ✓       | ✓       |
+| Reconstruct     | ✓       | ✓       |
+| ReconstructData | ✓       | ✓       |
+| ReconstructSome | ✓       | ✓ (+)   |
+| Update          | ✓       | -       |
+| Split           | ✓       | ✓       |
+| Join            | ✓       | ✓       |
+
+* (+) Same as calling `ReconstructData`.
+
+The Split/Join functions will help to split an input to the proper sizes.
+
+Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`. 
+Reconstruction matrix calculation is more time-consuming, 
+so be sure to include that as part of any benchmark you run.  
+
+For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
+
+Leopard mode currently always runs as a single goroutine, since multiple gorouties doesn't provide any worthwhile speedup.
+
+## Forcing Leopard GF16
+
+The `WithLeopardGF16(true)` can be used to use Leopard GF16 for all operations. 
+This is *not* compatible with the Leopard library that has a separate GF8 implementation.
+
+Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
+For Cauchy matrix the inversion cache is disabled for a more "fair" test.
+Speed is total shard size for each operation. Data shard throughput is speed/2.
+AVX2 is used.
+
+| Encoder      | Shards      | Encode        | Recover All  | Recover One  |
+|--------------|-------------|---------------|--------------|--------------|
+| Cauchy       | 4+4         | 23076.83 MB/s | 3048.86 MB/s | 5620.84 MB/s |
+| Cauchy       | 8+8         | 15206.87 MB/s | 3041.99 MB/s | 7173.71 MB/s |
+| Cauchy       | 16+16       | 7427.47 MB/s  | 1384.58 MB/s | 6343.85 MB/s |
+| Cauchy       | 32+32       | 3785.64 MB/s  | 557.60 MB/s  | 4660.27 MB/s |
+| Cauchy       | 64+64       | 1911.93 MB/s  | 160.54 MB/s  | 2864.63 MB/s |
+| Cauchy       | 128+128     | 963.83 MB/s   | 42.81 MB/s   | 1597.93 MB/s |
+| Leopard GF16 | 4+4         | 18468.32 MB/s | 10.45 MB/s   | 10.30 MB/s   |
+| Leopard GF16 | 8+8         | 10293.79 MB/s | 20.83 MB/s   | 20.51 MB/s   |
+| Leopard GF16 | 16+16       | 12386.04 MB/s | 40.80 MB/s   | 40.47 MB/s   |
+| Leopard GF16 | 32+32       | 7347.35 MB/s  | 81.15 MB/s   | 79.80 MB/s   |
+| Leopard GF16 | 64+64       | 8299.63 MB/s  | 150.47 MB/s  | 154.15 MB/s  |
+| Leopard GF16 | 128+128     | 5629.04 MB/s  | 278.84 MB/s  | 289.15 MB/s  |
+| Leopard GF16 | 256+256     | 6158.66 MB/s  | 454.14 MB/s  | 506.70 MB/s  |
+| Leopard GF16 | 512+512     | 4418.58 MB/s  | 685.75 MB/s  | 801.63 MB/s  |
+| Leopard GF16 | 1024+1024   | 4778.05 MB/s  | 814.51 MB/s  | 1080.19 MB/s |
+| Leopard GF16 | 2048+2048   | 3417.05 MB/s  | 911.64 MB/s  | 1179.48 MB/s |
+| Leopard GF16 | 4096+4096   | 3209.41 MB/s  | 729.13 MB/s  | 1135.06 MB/s |
+| Leopard GF16 | 8192+8192   | 2034.11 MB/s  | 604.52 MB/s  | 842.13 MB/s  |
+| Leopard GF16 | 16384+16384 | 1525.88 MB/s  | 486.74 MB/s  | 750.01 MB/s  |
+| Leopard GF16 | 32768+32768 | 1138.67 MB/s  | 482.81 MB/s  | 712.73 MB/s  |
+
+"Traditional" encoding is faster until somewhere between 16 and 32 shards.
+Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
+
+Calculating the reconstruction matrix takes a significant amount of computation. 
+With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
+Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard. 
 
 # Performance
+
 Performance depends mainly on the number of parity shards. 
 In rough terms, doubling the number of parity shards will double the encoding time.
 
@@ -356,27 +451,16 @@ For reference each shard is 1MB random data, and 16 CPU cores are used for encod
 
 | Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
 |------|--------|---------|------------|-----------|
-| 5    | 2      | 14287   | 66355      | 108755    |
-| 8    | 8      | 5569    | 34298      | 70516     |
-| 10   | 4      | 6766    | 48237      | 93875     |
-| 50   | 20     | 1540    | 12130      | 22090     |
+| 5    | 2      | 20,772  | 66,355     | 108,755   |
+| 8    | 8      | 6,815   | 38,338     | 70,516    |
+| 10   | 4      | 9,245   | 48,237     | 93,875    |
+| 50   | 20     | 2,063   | 12,130     | 22,828    |
 
 The throughput numbers here is the size of the encoded data and parity shards.
 
 If `runtime.GOMAXPROCS()` is set to a value higher than 1, 
 the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
 
-Example of performance scaling on AMD Ryzen 3950X - 16 physical cores, 32 logical cores, AVX 2.
-The example uses 10 blocks with 1MB data each and 4 parity blocks.
-
-| Threads | Speed      |
-|---------|------------|
-| 1       | 9979 MB/s  |
-| 2       | 18870 MB/s |
-| 4       | 33697 MB/s |
-| 8       | 51531 MB/s |
-| 16      | 59204 MB/s |
-
 
 Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
 ```
@@ -390,22 +474,9 @@ BenchmarkReconstruct50x20x1M-8       1364.35      4189.79      3.07x
 BenchmarkReconstruct10x4x16M-8       1484.35      5779.53      3.89x
 ```
 
-# Performance on AVX512
+The performance on AVX512 has been accelerated for CPUs when available.
 
-The performance on AVX512 has been accelerated for Intel CPUs. 
-This gives speedups on a per-core basis typically up to 2x compared to 
-AVX2 as can be seen in the following table:
-
-```
-[...]
-```
-
-This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other. 
-In doing so it is possible to minimize the memory bandwidth required for loading all data shards. 
-At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM 
-registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
-
-# Performance on ARM64 NEON
+## ARM64 NEON
 
 By exploiting NEON instructions the performance for ARM has been accelerated. 
 Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
@@ -420,7 +491,7 @@ BenchmarkGaloisXor1M-64        10000    100322 ns/op        10452.13 MB/s
 # Performance on ppc64le
 
 The performance for ppc64le has been accelerated. 
-This gives roughly a 10x performance improvement on this architecture as can been seen below:
+This gives roughly a 10x performance improvement on this architecture as can be seen below:
 
 ```
 benchmark                      old MB/s     new MB/s     speedup
@@ -430,9 +501,6 @@ BenchmarkGaloisXor128K-160     862.02       7905.00      9.17x
 BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
 ```
 
-# asm2plan9s
-
-[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
 
 # Links
 * [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
@@ -443,6 +511,7 @@ BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
 * [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
 * [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
 * [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
 
 # License
 

+ 0 - 20
vendor/github.com/klauspost/reedsolomon/appveyor.yml

@@ -1,20 +0,0 @@
-os: Visual Studio 2015
-
-platform: x64
-
-clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
-
-# environment variables
-environment:
-  GOPATH: c:\gopath
-
-install:
-  - echo %PATH%
-  - echo %GOPATH%
-  - go version
-  - go env
-  - go get -d ./...
-
-build_script:
-  - go test -v -cpu=2 ./...
-  - go test -cpu=1,2,4 -short -race ./...

+ 23 - 0
vendor/github.com/klauspost/reedsolomon/galois.go

@@ -6,6 +6,8 @@
 
 package reedsolomon
 
+import "encoding/binary"
+
 const (
 	// The number of elements in the field.
 	fieldSize = 256
@@ -929,3 +931,24 @@ func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte)
 	}
 	return dst
 }
+
+// xor slices writing to out.
+func sliceXorGo(in, out []byte, _ *options) {
+	for len(out) >= 32 {
+		inS := in[:32]
+		v0 := binary.LittleEndian.Uint64(out[:8]) ^ binary.LittleEndian.Uint64(inS[:8])
+		v1 := binary.LittleEndian.Uint64(out[8:16]) ^ binary.LittleEndian.Uint64(inS[8:16])
+		v2 := binary.LittleEndian.Uint64(out[16:24]) ^ binary.LittleEndian.Uint64(inS[16:24])
+		v3 := binary.LittleEndian.Uint64(out[24:32]) ^ binary.LittleEndian.Uint64(inS[24:32])
+		binary.LittleEndian.PutUint64(out[:8], v0)
+		binary.LittleEndian.PutUint64(out[8:16], v1)
+		binary.LittleEndian.PutUint64(out[16:24], v2)
+		binary.LittleEndian.PutUint64(out[24:32], v3)
+		out = out[32:]
+		in = in[32:]
+	}
+	out = out[:len(in)]
+	for n, input := range in {
+		out[n] ^= input
+	}
+}

+ 3 - 3
vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go

@@ -104,7 +104,7 @@ func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[
 // Invoke AVX512 routine for single output row in parallel
 func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
 	done := stop - start
-	if done <= 0 {
+	if done <= 0 || len(in) == 0 || len(out) == 0 {
 		return
 	}
 
@@ -139,7 +139,7 @@ func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset,
 // Invoke AVX512 routine for 2 output rows in parallel
 func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
 	done := stop - start
-	if done <= 0 {
+	if done <= 0 || len(in) == 0 || len(out) == 0 {
 		return
 	}
 
@@ -174,7 +174,7 @@ func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset,
 // Invoke AVX512 routine for 4 output rows in parallel
 func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
 	done := stop - start
-	if done <= 0 {
+	if done <= 0 || len(in) == 0 || len(out) == 0 {
 		return
 	}
 

+ 206 - 0
vendor/github.com/klauspost/reedsolomon/galois_amd64.go

@@ -132,9 +132,215 @@ func sliceXor(in, out []byte, o *options) {
 			in = in[done:]
 			out = out[done:]
 		}
+	} else {
+		sliceXorGo(in, out, o)
+		return
 	}
 	out = out[:len(in)]
 	for i := range in {
 		out[i] ^= in[i]
 	}
 }
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	t01 := &multiply256LUT[log_m01]
+	t23 := &multiply256LUT[log_m23]
+	t02 := &multiply256LUT[log_m02]
+	if o.useAVX512 {
+		if log_m01 == modulus {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	} else if o.useAVX2 {
+		if log_m01 == modulus {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	t01 := &multiply256LUT[log_m01]
+	t23 := &multiply256LUT[log_m23]
+	t02 := &multiply256LUT[log_m02]
+	if o.useAVX512 {
+		if log_m02 == modulus {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	} else if o.useAVX2 {
+		if log_m02 == modulus {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	if o.useAVX2 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			fftDIT2_avx2(x, y, tmp)
+		}
+	} else if o.useSSSE3 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			fftDIT2_ssse3(x, y, tmp)
+		}
+	} else {
+		// Reference version:
+		refMulAdd(x, y, log_m)
+		sliceXor(x, y, o)
+	}
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	if o.useAVX2 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			ifftDIT2_avx2(x, y, tmp)
+		}
+	} else if o.useSSSE3 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			ifftDIT2_ssse3(x, y, tmp)
+		}
+	} else {
+		// Reference version:
+		sliceXor(x, y, o)
+		refMulAdd(x, y, log_m)
+	}
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	if o.useAVX2 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			mulgf16_avx2(x, y, tmp)
+		}
+	} else if o.useSSSE3 {
+		if len(x) > 0 {
+			tmp := &multiply256LUT[log_m]
+			mulgf16_ssse3(x, y, tmp)
+		}
+	} else {
+		refMul(x, y, log_m)
+	}
+}

+ 30 - 0
vendor/github.com/klauspost/reedsolomon/galois_arm64.go

@@ -64,3 +64,33 @@ func sliceXor(in, out []byte, o *options) {
 		}
 	}
 }
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	refMulAdd(x, y, log_m)
+	// 64 byte aligned, always full.
+	galXorNEON(x, y)
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	// 64 byte aligned, always full.
+	galXorNEON(x, y)
+	// Reference version:
+	refMulAdd(x, y, log_m)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	refMul(x, y, log_m)
+}

+ 376 - 1
vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go

@@ -1,1176 +1,1551 @@
 // Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT.
 
 //go:build !appengine && !noasm && !nogen && gc
-// +build !appengine,!noasm,!nogen,gc
 
 package reedsolomon
 
+func _dummy_()
+
 // mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x1_64 takes 1 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x1Xor takes 1 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x1_64Xor takes 1 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x2_64 takes 1 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x2Xor takes 1 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x2_64Xor takes 1 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x3_64 takes 1 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x3Xor takes 1 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x3_64Xor takes 1 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x4Xor takes 1 inputs and produces 4 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x5Xor takes 1 inputs and produces 5 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x6Xor takes 1 inputs and produces 6 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x7Xor takes 1 inputs and produces 7 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x8Xor takes 1 inputs and produces 8 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x9 takes 1 inputs and produces 9 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x9Xor takes 1 inputs and produces 9 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x10 takes 1 inputs and produces 10 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_1x10Xor takes 1 inputs and produces 10 outputs.
+//
 //go:noescape
 func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x1_64 takes 2 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x1Xor takes 2 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x1_64Xor takes 2 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x2_64 takes 2 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x2Xor takes 2 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x2_64Xor takes 2 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x3_64 takes 2 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x3Xor takes 2 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x3_64Xor takes 2 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x4Xor takes 2 inputs and produces 4 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x5Xor takes 2 inputs and produces 5 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x6Xor takes 2 inputs and produces 6 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x7Xor takes 2 inputs and produces 7 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x8Xor takes 2 inputs and produces 8 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x9 takes 2 inputs and produces 9 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x9Xor takes 2 inputs and produces 9 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x10 takes 2 inputs and produces 10 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_2x10Xor takes 2 inputs and produces 10 outputs.
+//
 //go:noescape
 func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x1_64 takes 3 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x1Xor takes 3 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x1_64Xor takes 3 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x2_64 takes 3 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x2Xor takes 3 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x2_64Xor takes 3 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x3_64 takes 3 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x3Xor takes 3 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x3_64Xor takes 3 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x4Xor takes 3 inputs and produces 4 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x5Xor takes 3 inputs and produces 5 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x6Xor takes 3 inputs and produces 6 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x7Xor takes 3 inputs and produces 7 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x8Xor takes 3 inputs and produces 8 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x9 takes 3 inputs and produces 9 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x9Xor takes 3 inputs and produces 9 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x10 takes 3 inputs and produces 10 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_3x10Xor takes 3 inputs and produces 10 outputs.
+//
 //go:noescape
 func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x1_64 takes 4 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x1Xor takes 4 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x1_64Xor takes 4 inputs and produces 1 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x2_64 takes 4 inputs and produces 2 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x2Xor takes 4 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x2_64Xor takes 4 inputs and produces 2 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x3_64 takes 4 inputs and produces 3 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x3Xor takes 4 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x3_64Xor takes 4 inputs and produces 3 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x4Xor takes 4 inputs and produces 4 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x5Xor takes 4 inputs and produces 5 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x6Xor takes 4 inputs and produces 6 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x7Xor takes 4 inputs and produces 7 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x8Xor takes 4 inputs and produces 8 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x9 takes 4 inputs and produces 9 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x9Xor takes 4 inputs and produces 9 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x10 takes 4 inputs and produces 10 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_4x10Xor takes 4 inputs and produces 10 outputs.
+//
 //go:noescape
 func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int)
 
 // mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
 // The output is initialized to 0.
+//
 //go:noescape
 func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)