OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_arch.h
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8// Copyright (c) 2026, Osamu Watanabe
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//***************************************************************************/
33// This file is part of the OpenJPH software implementation.
34// File: ojph_arch.h
35// Author: Aous Naman
36// Date: 28 August 2019
37//***************************************************************************/
38
39
40#ifndef OJPH_ARCH_H
41#define OJPH_ARCH_H
42
43#include <cstdio>
44#include <cstdint>
45#include <cmath>
46
47#include "ojph_defs.h"
48
49
51// preprocessor directives for compiler
53#ifdef _MSC_VER
54#define OJPH_COMPILER_MSVC
55#elif (defined __GNUC__)
56#define OJPH_COMPILER_GNUC
57#endif
58
59#ifdef __EMSCRIPTEN__
60#define OJPH_EMSCRIPTEN
61#endif
62
63#ifdef OJPH_COMPILER_MSVC
64#include <intrin.h>
65#endif
66
68 // portable force-inline / no-inline function qualifiers
70#ifdef OJPH_COMPILER_MSVC
71 #define OJPH_FORCE_INLINE static __forceinline
72 #define OJPH_NO_INLINE static __declspec(noinline)
73#else
74 #define OJPH_FORCE_INLINE static inline __attribute__((always_inline))
75 #define OJPH_NO_INLINE static __attribute__((noinline))
76#endif
77
79// preprocessor directives for architecture
81#if defined(__arm__) || defined(__TARGET_ARCH_ARM) \
82 || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
83 #define OJPH_ARCH_ARM
84#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
85 #define OJPH_ARCH_I386
86#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \
87 || defined(_M_X64)
88 #define OJPH_ARCH_X86_64
89#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
90 #define OJPH_ARCH_IA64
91#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \
92 || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \
93 || defined(_M_MPPC) || defined(_M_PPC)
94 #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
95 #define OJPH_ARCH_PPC64
96 #else
97 #define OJPH_ARCH_PPC
98 #endif
99#else
100 #define OJPH_ARCH_UNKNOWN
101#endif
102
103namespace ojph {
105 // disable SIMD for unknown architecture
107#if !defined(OJPH_ARCH_X86_64) && !defined(OJPH_ARCH_I386) && \
108 !defined(OJPH_ARCH_ARM) && !defined(OJPH_DISABLE_SIMD)
109#define OJPH_DISABLE_SIMD
110#endif // !OJPH_ARCH_UNKNOWN
111
113 // OS detection definitions
115#if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
116#define OJPH_OS_WINDOWS
117#elif (defined __APPLE__)
118#define OJPH_OS_APPLE
119#elif (defined __ANDROID__)
120#define OJPH_OS_ANDROID
121#elif (defined __linux)
122#define OJPH_OS_LINUX
123#elif (defined __FreeBSD__)
124#define OJPH_OS_FREEBSD
125#elif (defined __OpenBSD__)
126#define OJPH_OS_OPENBSD
127#endif
128
130 // defines for dll
132#if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
133#define OJPH_EXPORT __declspec(dllexport)
134#else
135#define OJPH_EXPORT
136#endif
137
139 // cpu features
142 int get_cpu_ext_level();
143
144 enum : int {
157 };
158
159 enum : int {
165 };
166
168 static inline ui32 population_count(ui32 val)
169 {
170 #if defined(OJPH_COMPILER_MSVC) \
171 && (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
172 return (ui32)__popcnt(val);
173 #elif (defined OJPH_COMPILER_GNUC)
174 return (ui32)__builtin_popcount(val);
175 #else
176 val -= ((val >> 1) & 0x55555555);
177 val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
178 val = (((val >> 4) + val) & 0x0f0f0f0f);
179 val += (val >> 8);
180 val += (val >> 16);
181 return (int)(val & 0x0000003f);
182 #endif
183 }
184
186#ifdef OJPH_COMPILER_MSVC
187 #pragma intrinsic(_BitScanReverse)
188#endif
189 static inline ui32 count_leading_zeros(ui32 val)
190 {
191 #ifdef OJPH_COMPILER_MSVC
192 unsigned long result = 0;
193 _BitScanReverse(&result, val);
194 return 31 ^ (ui32)result;
195 #elif (defined OJPH_COMPILER_GNUC)
196 return (ui32)__builtin_clz(val);
197 #else
198 val |= (val >> 1);
199 val |= (val >> 2);
200 val |= (val >> 4);
201 val |= (val >> 8);
202 val |= (val >> 16);
203 return 32 - population_count(val);
204 #endif
205 }
206
208#ifdef OJPH_COMPILER_MSVC
209 #if (defined OJPH_ARCH_X86_64 || defined OJPH_ARCH_ARM)
210 #pragma intrinsic(_BitScanReverse64)
211 #elif (defined OJPH_ARCH_I386)
212 #pragma intrinsic(_BitScanReverse)
213 #else
214 #error Error unsupport MSVC version
215 #endif
216#endif
217 static inline ui32 count_leading_zeros(ui64 val)
218 {
219 #ifdef OJPH_COMPILER_MSVC
220 unsigned long result = 0;
221 #if (defined OJPH_ARCH_X86_64) || (defined OJPH_ARCH_ARM)
222 _BitScanReverse64(&result, val);
223 #elif (defined OJPH_ARCH_I386)
224 ui32 msb = (ui32)(val >> 32), lsb = (ui32)val;
225 if (msb == 0)
226 _BitScanReverse(&result, lsb);
227 else {
228 _BitScanReverse(&result, msb);
229 result += 32;
230 }
231 #else
232 #error Error unsupport MSVC version
233 #endif
234 return 63 ^ (ui32)result;
235 #elif (defined OJPH_COMPILER_GNUC)
236 return (ui32)__builtin_clzll(val);
237 #else
238 val |= (val >> 1);
239 val |= (val >> 2);
240 val |= (val >> 4);
241 val |= (val >> 8);
242 val |= (val >> 16);
243 val |= (val >> 32);
244 return 64 - population_count64(val);
245 #endif
246 }
247
249#ifdef OJPH_COMPILER_MSVC
250 #pragma intrinsic(_BitScanForward)
251#endif
252 static inline ui32 count_trailing_zeros(ui32 val)
253 {
254 #ifdef OJPH_COMPILER_MSVC
255 unsigned long result = 0;
256 _BitScanForward(&result, val);
257 return (ui32)result;
258 #elif (defined OJPH_COMPILER_GNUC)
259 return (ui32)__builtin_ctz(val);
260 #else
261 val |= (val << 1);
262 val |= (val << 2);
263 val |= (val << 4);
264 val |= (val << 8);
265 val |= (val << 16);
266 return 32 - population_count(val);
267 #endif
268 }
269
271#ifdef OJPH_COMPILER_MSVC
272 #pragma intrinsic(_BitScanForward64)
273#endif
274 static inline ui32 count_trailing_zeros(ui64 val)
275 {
276 #ifdef OJPH_COMPILER_MSVC
277 unsigned long result = 0;
278 #if (defined OJPH_ARCH_X86_64) || (defined OJPH_ARCH_ARM)
279 _BitScanForward64(&result, val);
280 #elif (defined OJPH_ARCH_I386)
281 ui32 lsb = (ui32)val, msb = (ui32)(val >> 32);
282 if (lsb != 0)
283 _BitScanForward(&result, lsb);
284 else {
285 _BitScanForward(&result, msb);
286 result += 32;
287 }
288 #endif
289 return (ui32)result;
290 #elif (defined OJPH_COMPILER_GNUC)
291 return (ui32)__builtin_ctzll(val);
292 #else
293 if ((ui32)val != 0)
294 return count_trailing_zeros((ui32)val);
295 return 32 + count_trailing_zeros((ui32)(val >> 32));
296 #endif
297 }
298
300 static inline si32 ojph_round(float val)
301 {
302 #ifdef OJPH_COMPILER_MSVC
303 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
304 #elif (defined OJPH_COMPILER_GNUC)
305 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
306 #else
307 return (si32)round(val);
308 #endif
309 }
310
312 static inline si32 ojph_trunc(float val)
313 {
314 #ifdef OJPH_COMPILER_MSVC
315 return (si32)(val);
316 #elif (defined OJPH_COMPILER_GNUC)
317 return (si32)(val);
318 #else
319 return (si32)trunc(val);
320 #endif
321 }
322
324 // constants
326 #ifndef OJPH_EMSCRIPTEN
327 const ui32 byte_alignment = 64; // 64 bytes == 512 bits
330 #else
331 const ui32 byte_alignment = 16; // 16 bytes == 128 bits
333 const ui32 object_alignment = 8;
334 #endif
335
337 // templates for alignment
339
341 // finds the size such that it is a multiple of byte_alignment
342 template <typename T, ui32 N>
343 size_t calc_aligned_size(size_t size) {
344 size = size * sizeof(T) + N - 1;
345 size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
346 size >>= (63 - count_leading_zeros((ui64)sizeof(T)));
347 return size;
348 }
349
351 // moves the pointer to first address that is a multiple of byte_alignment
352 template <typename T, ui32 N>
353 inline T *align_ptr(T *ptr) {
354 intptr_t p = reinterpret_cast<intptr_t>(ptr);
355 p += N - 1;
356 p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
357 return reinterpret_cast<T *>(p);
358 }
359
360}
361
362#endif // !OJPH_ARCH_H
const ui32 object_alignment
Definition ojph_arch.h:329
@ ARM_CPU_EXT_LEVEL_SVE
Definition ojph_arch.h:163
@ ARM_CPU_EXT_LEVEL_SVE2
Definition ojph_arch.h:164
@ ARM_CPU_EXT_LEVEL_NEON
Definition ojph_arch.h:161
@ ARM_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:160
@ ARM_CPU_EXT_LEVEL_ASIMD
Definition ojph_arch.h:162
const ui32 byte_alignment
Definition ojph_arch.h:327
uint64_t ui64
Definition ojph_defs.h:56
static si32 ojph_round(float val)
Definition ojph_arch.h:300
size_t calc_aligned_size(size_t size)
Definition ojph_arch.h:343
T * align_ptr(T *ptr)
Definition ojph_arch.h:353
static ui32 population_count(ui32 val)
Definition ojph_arch.h:168
OJPH_EXPORT int get_cpu_ext_level()
static si32 ojph_trunc(float val)
Definition ojph_arch.h:312
static ui32 count_trailing_zeros(ui32 val)
Definition ojph_arch.h:252
static ui32 count_leading_zeros(ui32 val)
Definition ojph_arch.h:189
int32_t si32
Definition ojph_defs.h:55
const ui32 log_byte_alignment
Definition ojph_arch.h:328
uint32_t ui32
Definition ojph_defs.h:54
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:154
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:153
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:156
@ X86_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:145
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:148
@ X86_CPU_EXT_LEVEL_SSE41
Definition ojph_arch.h:151
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:147
@ X86_CPU_EXT_LEVEL_MMX
Definition ojph_arch.h:146
@ X86_CPU_EXT_LEVEL_SSE42
Definition ojph_arch.h:152
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:150
@ X86_CPU_EXT_LEVEL_SSE3
Definition ojph_arch.h:149
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition ojph_arch.h:155
#define OJPH_EXPORT
Definition ojph_arch.h:135