OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_arch.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8// Copyright (c) 2026, Osamu Watanabe
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//***************************************************************************/
33// This file is part of the OpenJPH software implementation.
34// File: ojph_arch.cpp
35// Author: Aous Naman
36// Date: 28 August 2019
37//***************************************************************************/
38
39#include <cassert>
40
41#include "ojph_arch.h"
42
43namespace ojph {
44
45#ifndef OJPH_DISABLE_SIMD
46
47 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
48
50 // This snippet is borrowed from Intel; see for example
51 // https://software.intel.com/en-us/articles/
52 // how-to-detect-knl-instruction-support
53 bool run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
54 {
55 #ifdef OJPH_COMPILER_MSVC
56 __cpuidex((int *)abcd, eax, ecx);
57 #else
58 uint32_t ebx = 0, edx = 0;
59 #if defined( __i386__ ) && defined ( __PIC__ )
60 /* in case of PIC under 32-bit EBX cannot be clobbered */
61 __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi"
62 : "=D" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx) );
63 #else
64 __asm__ ( "cpuid" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx) );
65 #endif
66 abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
67 #endif
68 return true;
69 }
70
72 uint64_t read_xcr(uint32_t index)
73 {
74 #ifdef OJPH_COMPILER_MSVC
75 return _xgetbv(index);
76 #else
77 uint32_t eax = 0, edx = 0;
78 __asm__ ( "xgetbv" : "=a" (eax), "=d" (edx) : "c" (index) );
79 return ((uint64_t)edx << 32) | eax;
80 #endif
81 }
82
84 bool init_cpu_ext_level(int& level)
85 {
86 uint32_t mmx_abcd[4];
87 run_cpuid(1, 0, mmx_abcd);
88 bool mmx_avail = ((mmx_abcd[3] & 0x00800000) == 0x00800000);
89
90 level = 0;
91 if (mmx_avail)
92 {
94 bool sse_avail = ((mmx_abcd[3] & 0x02000000) == 0x02000000);
95 if (sse_avail)
96 {
98 bool sse2_avail = ((mmx_abcd[3] & 0x04000000) == 0x04000000);
99 if (sse2_avail)
100 {
102 bool sse3_avail = ((mmx_abcd[2] & 0x00000001) == 0x00000001);
103 if (sse3_avail)
104 {
106 bool ssse3_avail = ((mmx_abcd[2] & 0x00000200) == 0x00000200);
107 if (ssse3_avail)
108 {
110 bool sse41_avail = ((mmx_abcd[2] & 0x00080000) == 0x00080000);
111 if (sse41_avail) {
113 bool sse42_avail = ((mmx_abcd[2] & 0x00100000) == 0x00100000);
114 if (sse42_avail)
115 {
117
118 uint64_t xcr_val = 0;
119 bool osxsave_avail, ymm_avail, avx_avail = false;
120 osxsave_avail = ((mmx_abcd[2] & 0x08000000) == 0x08000000);
121 if (osxsave_avail)
122 {
123 xcr_val = read_xcr(0); // _XCR_XFEATURE_ENABLED_MASK = 0
124 ymm_avail = osxsave_avail && ((xcr_val & 0x6) == 0x6);
125 avx_avail = ymm_avail && (mmx_abcd[2] & 0x10000000);
126 }
127 if (avx_avail)
128 {
129 level = X86_CPU_EXT_LEVEL_AVX;
130
131 uint32_t avx2_abcd[4];
132 run_cpuid(7, 0, avx2_abcd);
133 bool avx2_avail = (avx2_abcd[1] & 0x20) != 0;
134 if (avx2_avail)
135 {
137 bool avx2fma_avail =
138 avx2_avail && ((mmx_abcd[2] & 0x1000) == 0x1000);
139 if (avx2fma_avail)
140 {
142
143 bool zmm_avail =
144 osxsave_avail && ((xcr_val & 0xE0) == 0xE0);
145 bool avx512f_avail = (avx2_abcd[1] & 0x10000) != 0;
146 bool avx512cd_avail = (avx2_abcd[1] & 0x10000000) != 0;
147 bool avx512bw_avail = (avx2_abcd[1] & 0x40000000) != 0;
148 bool avx512vl_avail =
149 (avx2_abcd[1] & 0x80000000u) != 0;
150 bool avx512_avail = zmm_avail && avx512f_avail
151 && avx512cd_avail && avx512bw_avail
152 && avx512vl_avail;
153 if (avx512_avail)
155 }
156 }
157 }
158 }
159 }
160 }
161 }
162 }
163 }
164 }
165 return true;
166 }
167 #elif defined(OJPH_ARCH_ARM)
168
169 #if !defined(OJPH_OS_LINUX) && !defined(OJPH_OS_FREEBSD) && !defined(OJPH_OS_OPENBSD) // Windows/Apple/Android
170
171 bool init_cpu_ext_level(int& level) {
173 return true;
174 }
175
176 #else // Linux/FreeBSD/OpenBSD
177
178 #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) // 64-bit ARM
179
180 #include <sys/auxv.h>
181 #ifdef OJPH_OS_LINUX
182 #include <asm/hwcap.h>
183 #endif
184
185 bool init_cpu_ext_level(int& level) {
186 #ifdef OJPH_OS_LINUX
187 unsigned long hwcaps = getauxval(AT_HWCAP);
188 unsigned long hwcaps2 = getauxval(AT_HWCAP2);
189 #else
190 unsigned long hwcaps = 0;
191 unsigned long hwcaps2 = 0;
192 elf_aux_info(AT_HWCAP, &hwcaps, sizeof(hwcaps));
193 elf_aux_info(AT_HWCAP2, &hwcaps2, sizeof(hwcaps2));
194 #endif
195
197 if (hwcaps & HWCAP_ASIMD) {
199 if (hwcaps & HWCAP_SVE) {
200 level = ARM_CPU_EXT_LEVEL_SVE;
201 if (hwcaps2 & HWCAP2_SVE2)
203 }
204 }
205 return true;
206 }
207
208 #else // 32-bit ARM
209
210 #include <sys/auxv.h>
211 #ifdef OJPH_OS_LINUX
212 #include <asm/hwcap.h>
213 #endif
214
215 bool init_cpu_ext_level(int& level) {
216 #ifdef OJPH_OS_LINUX
217 unsigned long hwcaps = getauxval(AT_HWCAP);
218 #else
219 unsigned long hwcaps = 0;
220 elf_aux_info(AT_HWCAP, &hwcaps, sizeof(hwcaps));
221 #endif
223 if (hwcaps & HWCAP_NEON)
225 return true;
226 }
227
228 #endif // end of 64-bit ARM
229
230 #endif
231
232 #else // architectures other than Intel/AMD and ARM
233
235 bool init_cpu_ext_level(int& level) {
236 level = 0;
237 return true;
238 }
239
240 #endif // !OJPH_DISABLE_SIMD
241
242#elif defined(OJPH_ENABLE_WASM_SIMD) && defined(OJPH_EMSCRIPTEN)
243
245 bool init_cpu_ext_level(int& level) {
246 level = 1;
247 return true;
248 }
249
250#else
251
253 bool init_cpu_ext_level(int& level) {
254 level = 0;
255 return true;
256 }
257
258#endif
259
261 static int cpu_level;
263
266 {
267 assert(cpu_level_initialized);
268 return cpu_level;
269 }
270
271}
bool init_cpu_ext_level(int &level)
@ ARM_CPU_EXT_LEVEL_SVE
Definition ojph_arch.h:163
@ ARM_CPU_EXT_LEVEL_SVE2
Definition ojph_arch.h:164
@ ARM_CPU_EXT_LEVEL_NEON
Definition ojph_arch.h:161
@ ARM_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:160
@ ARM_CPU_EXT_LEVEL_ASIMD
Definition ojph_arch.h:162
OJPH_EXPORT int get_cpu_ext_level()
static int cpu_level
static bool cpu_level_initialized
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:154
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:153
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:156
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:148
@ X86_CPU_EXT_LEVEL_SSE41
Definition ojph_arch.h:151
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:147
@ X86_CPU_EXT_LEVEL_MMX
Definition ojph_arch.h:146
@ X86_CPU_EXT_LEVEL_SSE42
Definition ojph_arch.h:152
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:150
@ X86_CPU_EXT_LEVEL_SSE3
Definition ojph_arch.h:149
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition ojph_arch.h:155