diff options
Diffstat (limited to 'Source/3rdParty/SDL2/src/cpuinfo')
-rw-r--r-- | Source/3rdParty/SDL2/src/cpuinfo/SDL_cpuinfo.c | 105 | ||||
-rw-r--r-- | Source/3rdParty/SDL2/src/cpuinfo/SDL_simd.h | 88 |
2 files changed, 180 insertions, 13 deletions
diff --git a/Source/3rdParty/SDL2/src/cpuinfo/SDL_cpuinfo.c b/Source/3rdParty/SDL2/src/cpuinfo/SDL_cpuinfo.c index 4e2c0f1..4410358 100644 --- a/Source/3rdParty/SDL2/src/cpuinfo/SDL_cpuinfo.c +++ b/Source/3rdParty/SDL2/src/cpuinfo/SDL_cpuinfo.c @@ -22,6 +22,7 @@ #include "SDL_config.h" #else #include "../SDL_internal.h" +#include "SDL_simd.h" #endif #if defined(__WIN32__) @@ -38,6 +39,7 @@ /* CPU feature detection for SDL */ #include "SDL_cpuinfo.h" +#include "SDL_assert.h" #ifdef HAVE_SYSCONF #include <unistd.h> @@ -76,18 +78,19 @@ #endif #endif -#define CPU_HAS_RDTSC 0x00000001 -#define CPU_HAS_ALTIVEC 0x00000002 -#define CPU_HAS_MMX 0x00000004 -#define CPU_HAS_3DNOW 0x00000008 -#define CPU_HAS_SSE 0x00000010 -#define CPU_HAS_SSE2 0x00000020 -#define CPU_HAS_SSE3 0x00000040 -#define CPU_HAS_SSE41 0x00000100 -#define CPU_HAS_SSE42 0x00000200 -#define CPU_HAS_AVX 0x00000400 -#define CPU_HAS_AVX2 0x00000800 -#define CPU_HAS_NEON 0x00001000 +#define CPU_HAS_RDTSC (1 << 0) +#define CPU_HAS_ALTIVEC (1 << 1) +#define CPU_HAS_MMX (1 << 2) +#define CPU_HAS_3DNOW (1 << 3) +#define CPU_HAS_SSE (1 << 4) +#define CPU_HAS_SSE2 (1 << 5) +#define CPU_HAS_SSE3 (1 << 6) +#define CPU_HAS_SSE41 (1 << 7) +#define CPU_HAS_SSE42 (1 << 8) +#define CPU_HAS_AVX (1 << 9) +#define CPU_HAS_AVX2 (1 << 10) +#define CPU_HAS_NEON (1 << 11) +#define CPU_HAS_AVX512F (1 << 12) #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__ /* This is the brute force way of detecting instruction sets... @@ -246,6 +249,7 @@ done: static int CPU_CPUIDFeatures[4]; static int CPU_CPUIDMaxFunction = 0; static SDL_bool CPU_OSSavesYMM = SDL_FALSE; +static SDL_bool CPU_OSSavesZMM = SDL_FALSE; static void CPU_calcCPUIDFeatures(void) @@ -266,7 +270,7 @@ CPU_calcCPUIDFeatures(void) /* Check to make sure we can call xgetbv */ if (c & 0x08000000) { - /* Call xgetbv to see if YMM register state is saved */ + /* Call xgetbv to see if YMM (etc) register state is saved */ #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__)) __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx"); #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */ @@ -280,6 +284,7 @@ CPU_calcCPUIDFeatures(void) } #endif CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE; + CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE; } } } @@ -400,6 +405,18 @@ CPU_haveAVX2(void) return 0; } +static int +CPU_haveAVX512F(void) +{ + if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) { + int a, b, c, d; + (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */ + cpuid(7, a, b, c, d); + return (b & 0x00010000); + } + return 0; +} + static int SDL_CPUCount = 0; int @@ -571,6 +588,7 @@ SDL_GetCPUCacheLineSize(void) } static Uint32 SDL_CPUFeatures = 0xFFFFFFFF; +static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF; static Uint32 SDL_GetCPUFeatures(void) @@ -578,41 +596,57 @@ SDL_GetCPUFeatures(void) if (SDL_CPUFeatures == 0xFFFFFFFF) { CPU_calcCPUIDFeatures(); SDL_CPUFeatures = 0; + SDL_SIMDAlignment = 4; /* a good safe base value */ if (CPU_haveRDTSC()) { SDL_CPUFeatures |= CPU_HAS_RDTSC; } if (CPU_haveAltiVec()) { SDL_CPUFeatures |= CPU_HAS_ALTIVEC; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveMMX()) { SDL_CPUFeatures |= CPU_HAS_MMX; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); } if (CPU_have3DNow()) { SDL_CPUFeatures |= CPU_HAS_3DNOW; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); } if (CPU_haveSSE()) { SDL_CPUFeatures |= CPU_HAS_SSE; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE2()) { SDL_CPUFeatures |= CPU_HAS_SSE2; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE3()) { SDL_CPUFeatures |= CPU_HAS_SSE3; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE41()) { SDL_CPUFeatures |= CPU_HAS_SSE41; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE42()) { SDL_CPUFeatures |= CPU_HAS_SSE42; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveAVX()) { SDL_CPUFeatures |= CPU_HAS_AVX; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); } if (CPU_haveAVX2()) { SDL_CPUFeatures |= CPU_HAS_AVX2; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); + } + if (CPU_haveAVX512F()) { + SDL_CPUFeatures |= CPU_HAS_AVX512F; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64); } if (CPU_haveNEON()) { SDL_CPUFeatures |= CPU_HAS_NEON; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } } return SDL_CPUFeatures; @@ -686,6 +720,12 @@ SDL_HasAVX2(void) } SDL_bool +SDL_HasAVX512F(void) +{ + return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F); +} + +SDL_bool SDL_HasNEON(void) { return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON); @@ -745,6 +785,44 @@ SDL_GetSystemRAM(void) } +size_t +SDL_SIMDGetAlignment(void) +{ + if (SDL_SIMDAlignment == 0xFFFFFFFF) { + SDL_GetCPUFeatures(); /* make sure this has been calculated */ + } + SDL_assert(SDL_SIMDAlignment != 0); + return SDL_SIMDAlignment; +} + +void * +SDL_SIMDAlloc(const size_t len) +{ + const size_t alignment = SDL_SIMDGetAlignment(); + const size_t padding = alignment - (len % alignment); + const size_t padded = (padding != alignment) ? (len + padding) : len; + Uint8 *retval = NULL; + Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *)); + if (ptr) { + /* store the actual malloc pointer right before our aligned pointer. */ + retval = ptr + sizeof (void *); + retval += alignment - (((size_t) retval) % alignment); + *(((void **) retval) - 1) = ptr; + } + return retval; +} + +void +SDL_SIMDFree(void *ptr) +{ + if (ptr) { + void **realptr = (void **) ptr; + realptr--; + SDL_free(*(((void **) ptr) - 1)); + } +} + + #ifdef TEST_MAIN #include <stdio.h> @@ -767,6 +845,7 @@ main() printf("SSE4.2: %d\n", SDL_HasSSE42()); printf("AVX: %d\n", SDL_HasAVX()); printf("AVX2: %d\n", SDL_HasAVX2()); + printf("AVX-512F: %d\n", SDL_HasAVX512F()); printf("NEON: %d\n", SDL_HasNEON()); printf("RAM: %d MB\n", SDL_GetSystemRAM()); return 0; diff --git a/Source/3rdParty/SDL2/src/cpuinfo/SDL_simd.h b/Source/3rdParty/SDL2/src/cpuinfo/SDL_simd.h new file mode 100644 index 0000000..e2b28bc --- /dev/null +++ b/Source/3rdParty/SDL2/src/cpuinfo/SDL_simd.h @@ -0,0 +1,88 @@ +/* + Simple DirectMedia Layer + Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org> + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SDL.h" +#include "../SDL_internal.h" + +/** + * \brief Report the alignment this system needs for SIMD allocations. + * + * This will return the minimum number of bytes to which a pointer must be + * aligned to be compatible with SIMD instructions on the current machine. + * For example, if the machine supports SSE only, it will return 16, but if + * it supports AVX-512F, it'll return 64 (etc). This only reports values for + * instruction sets SDL knows about, so if your SDL build doesn't have + * SDL_HasAVX512F(), then it might return 16 for the SSE support it sees and + * not 64 for the AVX-512 instructions that exist but SDL doesn't know about. + * Plan accordingly. + */ +extern size_t SDL_SIMDGetAlignment(void); + +/** + * \brief Allocate memory in a SIMD-friendly way. + * + * This will allocate a block of memory that is suitable for use with SIMD + * instructions. Specifically, it will be properly aligned and padded for + * the system's supported vector instructions. + * + * The memory returned will be padded such that it is safe to read or write + * an incomplete vector at the end of the memory block. This can be useful + * so you don't have to drop back to a scalar fallback at the end of your + * SIMD processing loop to deal with the final elements without overflowing + * the allocated buffer. + * + * You must free this memory with SDL_FreeSIMD(), not free() or SDL_free() + * or delete[], etc. + * + * Note that SDL will only deal with SIMD instruction sets it is aware of; + * for example, SDL 2.0.8 knows that SSE wants 16-byte vectors + * (SDL_HasSSE()), and AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't + * know that AVX-512 wants 64. To be clear: if you can't decide to use an + * instruction set with an SDL_Has*() function, don't use that instruction + * set with memory allocated through here. + * + * SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't + * out of memory. + * + * \param len The length, in bytes, of the block to allocated. The actual + * allocated block might be larger due to padding, etc. + * \return Pointer to newly-allocated block, NULL if out of memory. + * + * \sa SDL_SIMDAlignment + * \sa SDL_SIMDFree + */ +extern void * SDL_SIMDAlloc(const size_t len); + +/** + * \brief Deallocate memory obtained from SDL_SIMDAlloc + * + * It is not valid to use this function on a pointer from anything but + * SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc, + * SDL_malloc, memalign, new[], etc. + * + * However, SDL_SIMDFree(NULL) is a legal no-op. + * + * \sa SDL_SIMDAlloc + */ +extern void SDL_SIMDFree(void *ptr); + +/* vi: set ts=4 sw=4 expandtab: */ + |