aboutsummaryrefslogtreecommitdiff
path: root/src/libs/tiny/tinysound.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/libs/tiny/tinysound.h')
-rw-r--r--src/libs/tiny/tinysound.h2560
1 files changed, 0 insertions, 2560 deletions
diff --git a/src/libs/tiny/tinysound.h b/src/libs/tiny/tinysound.h
deleted file mode 100644
index 41d547d..0000000
--- a/src/libs/tiny/tinysound.h
+++ /dev/null
@@ -1,2560 +0,0 @@
-/*
-tinysound.h - v1.07
-
-Summary:
-tinysound is a C API for loading, playing, looping, panning and fading mono
-and stero sounds. This means tinysound imparts no external DLLs or large
-libraries that adversely effect shipping size. tinysound can also run on
-Windows XP since DirectSound ships with all recent versions of Windows.
-tinysound implements a custom SSE2 mixer by explicitly locking and unlocking
-portions of an internal. tinysound uses CoreAudio for Apple machines (like
-OSX and iOS). SDL is used for all other platforms. Define TS_FORCE_SDL
-before placaing the TS_IMPLEMENTATION in order to force the use of SDL.
-
-Revision history:
-1.0 (06/04/2016) initial release
-1.01 (06/06/2016) load WAV from memory
-separate portable and OS-specific code in tsMix
-fixed bug causing audio glitches when sounds ended
-added stb_vorbis loaders + demo example
-1.02 (06/08/2016) error checking + strings in vorbis loaders
-SSE2 implementation of mixer
-fix typos on docs/comments
-corrected volume bug introduced in 1.01
-1.03 (07/05/2016) size calculation helper (to know size of sound in
-bytes on the heap) tsSoundSize
-1.04 (12/06/2016) merged in Aaron Balint's contributions
-SFFT and pitch functions from Stephan M. Bernsee
-tsMix can run on its own thread with tsSpawnMixThread
-updated documentation, typo fixes
-fixed typo in malloc16 that caused heap corruption
-1.05 (12/08/2016) tsStopAllSounds, suggested by Aaron Balint
-1.06 (02/17/2017) port to CoreAudio for Apple machines
-1.07 (06/18/2017) SIMD the pitch shift code; swapped out old Bernsee
-code for a new re-write, updated docs as necessary,
-support for compiling as .c and .cpp on Windows,
-port for SDL (for Linux, or any other platform).
-Special thanks to DexP of github for 90% of the work
-on the SDL port!
-*/
-
-/*
-Contributors:
-Aaron Balint 1.04 - real time pitch
-1.04 - separate thread for tsMix
-1.04 - bugfix, removed extra free16 call for second channel
-DeXP 1.07 - initial work on SDL port
-*/
-
-/*
-To create implementation (the function definitions)
-#define TS_IMPLEMENTATION
-in *one* C/CPP file (translation unit) that includes this file
-
-DOCUMENTATION (very quick intro):
-1. create context
-2. load sounds from disk into memory
-3. play sounds
-4. free context
-
-1. tsContext* ctx = tsMakeContext( hwnd, frequency, latency, seconds, N );
-2. tsPlaySoundDef def = tsMakeDef( &tsLoadWAV( "path_to_file/filename.wav" ) );
-3. tsPlaySound( ctx, def );
-4. tsShutdownContext( ctx );
-
-DOCUMENTATION (longer introduction):
-tinysound consists of tsLoadedSounds, tsPlayingSounds and the tsContext.
-The tsContext encapsulates an OS sound API, as well as buffers + settings.
-tsLoadedSound holds raw samples of a sound. tsPlayingSound is an instance
-of a tsLoadedSound that represents a sound that can be played through the
-tsContext.
-
-There are two main versions of the API, the low-level and the high-level
-API. The low-level API does not manage any memory for tsPlayingSounds. The
-high level api holds a memory pool of playing sounds.
-
-High-level API:
-First create a context and pass in non-zero to the final parameter. This
-final parameter controls how large of a memory pool to use for tsPlayingSounds.
-Here's an example where N is the size of the internal pool:
-
-tsContext* ctx = tsMakeContext( hwnd, frequency, latency, seconds, N );
-
-We create tsPlayingSounds indirectly with tsPlayDef structs. tsPlayDef is a
-POD struct so feel free to make them straight on the stack. The tsPlayDef
-sets up initialization parameters. Here's an example to load a wav and
-play it:
-
-tsLoadedSound loaded = tsLoadWAV( "path_to_file/filename.wav" );
-tsPlaySoundDef def = tsMakeDef( &loaded );
-tsPlayingSound* sound = tsPlaySound( ctx, def );
-
-The same def can be used to play as many sounds as desired (even simultaneously)
-as long as the context playing sound pool is large enough.
-
-Low-level API:
-First create a context and pass 0 in the final parameter (0 here means
-the context will *not* allocate a tsPlayingSound memory pool):
-
-tsContext* ctx = tsMakeContext( hwnd, frequency, latency, seconds, 0 );
-
-parameters:
-hwnd -- HWND, handle to window (on OSX just pass in 0)
-frequency -- int, represents Hz frequency rate in which samples are played
-latency -- int, estimated latency in Hz from PlaySound call to speaker output
-seconds -- int, number of second of samples internal buffers can hold
-0 (last param) -- int, number of elements in tsPlayingSound pool
-
-We create a tsPlayingSound like so:
-tsLoadedSound loaded = tsLoadWAV( "path_to_file/filename.wav" );
-tsPlayingSound playing_sound = tsMakePlayingSound( &loaded );
-
-Then to play the sound we do:
-tsInsertSound( ctx, &playing_sound );
-
-The above tsInsertSound function call will place playing_sound into
-a singly-linked list inside the context. The context will remove
-the sound from its internal list when it finishes playing.
-
-WARNING: The high-level API cannot be mixed with the low-level API. If you
-try then the internal code will assert and crash. Pick one and stick with it.
-Usually he high-level API will be used, but if someone is *really* picky about
-their memory usage, or wants more control, the low-level API can be used.
-
-Here is the Low-Level API:
-tsPlayingSound tsMakePlayingSound( tsLoadedSound* loaded );
-void tsInsertSound( tsContext* ctx, tsPlayingSound* sound );
-
-Here is the High-Level API:
-tsPlayingSound* tsPlaySound( tsContext* ctx, tsPlaySoundDef def );
-tsPlaySoundDef tsMakeDef( tsLoadedSound* sound );
-void tsStopAllSounds( tsContext( ctx );
-
-Be sure to link against dsound.dll (or dsound.lib) on Windows.
-
-Read the rest of the header for specific details on all available functions
-and struct types.
-*/
-
-/*
-Known Limitations:
-
-* PCM mono/stereo format is the only formats the LoadWAV function supports. I don't
-guarantee it will work for all kinds of wav files, but it certainly does for the common
-kind (and can be changed fairly easily if someone wanted to extend it).
-* Only supports 16 bits per sample.
-* Mixer does not do any fancy clipping. The algorithm is to convert all 16 bit samples
-to float, mix all samples, and write back to audio API as 16 bit integers. In
-practice this works very well and clipping is not often a big problem.
-* I'm not super familiar with good ways to avoid the DirectSound play cursor from going
-past the write cursor. To mitigate this pass in a larger number to tsMakeContext's 4th
-parameter (buffer scale in seconds).
-* Pitch shifting code is pretty darn expensive. This is due to the use of a Fast Fourier Transform
-routine. The pitch shifting itself is written in rather efficient SIMD using SSE2 intrinsics,
-but the FFT routine is very basic. FFT is a big bottleneck for pitch shifting. There is a
-TODO optimization listed in this file for the FFT routine, but it's fairly low priority;
-optimizing FFT routines is difficult and requires a lot of specialized knowledge.
-*/
-
-/*
-FAQ
-Q : Why DirectSound instead of (insert API here) on Windows?
-A : Casey Muratori documented DS on Handmade Hero, other APIs do not have such good docs. DS has
-shipped on Windows XP all the way through Windows 10 -- using this header effectively intro-
-duces zero dependencies for the foreseeable future. The DS API itself is sane enough to quickly
-implement needed features, and users won't hear the difference between various APIs. Latency is
-not that great with DS but it is shippable. Additionally, many other APIs will in the end speak
-to Windows through the DS API.
-
-Q : Why not include Linux support?
-A : There have been a couple requests for ALSA support on Linux. For now the only option is to use
-SDL backend, which can indirectly support ALSA. SDL is used only in a very low-level manner;
-to get sound samples to the sound card via callback, so there shouldn't be much in the way of
-considering SDL a good option for "name your flavor" of Linux backend.
-
-Q : I would like to use my own memory management, how can I achieve this?
-A : This header makes a couple uses of malloc/free, and malloc16/free16. Simply find these bits
-and replace them with your own memory allocation routines. They can be wrapped up into a macro,
-or call your own functions directly -- it's up to you. Generally these functions allocate fairly
-large chunks of memory, and not very often (if at all), with one exception: tsSetPitch is a very
-expensive routine and requires frequent dynamic memory management.
-*/
-
-/*
-Some past discussion threads:
-https://www.reddit.com/r/gamedev/comments/6i39j2/tinysound_the_cutest_library_to_get_audio_into/
-https://www.reddit.com/r/gamedev/comments/4ml6l9/tinysound_singlefile_c_audio_library/
-https://forums.tigsource.com/index.php?topic=58706.0
-*/
-
-#if !defined( TINYSOUND_H )
-
-#define TS_WINDOWS 1
-#define TS_MAC 2
-#define TS_UNIX 3
-#define TS_SDL 4
-
-#if defined( _WIN32 )
-#define TS_PLATFORM TS_WINDOWS
-#elif defined( __APPLE__ )
-#define TS_PLATFORM TS_MAC
-#else
-#define TS_PLATFORM TS_SDL
-
-// please note TS_UNIX is not directly support
-// instead, unix-style OSes are encouraged to use SDL
-// see: https://www.libsdl.org/
-
-#endif
-
-// Use TS_FORCE_SDL to override the above macros and use
-// the SDL port.
-#ifdef TS_FORCE_SDL
-
-#undef TS_PLATFORM
-#define TS_PLATFORM TS_SDL
-
-#endif
-
-#include <stdint.h>
-
-// read this in the event of tsLoadWAV/tsLoadOGG errors
-// also read this in the event of certain errors from tsMakeContext
-extern const char* g_tsErrorReason;
-
-// stores a loaded sound in memory
-typedef struct
-{
- int sample_count;
- int channel_count;
- void* channels[2];
-} tsLoadedSound;
-
-struct tsPitchData;
-typedef struct tsPitchData tsPitchData;
-
-// represents an instance of a tsLoadedSound, can be played through the tsContext
-typedef struct tsPlayingSound
-{
- int active;
- int paused;
- int looped;
- float volume0;
- float volume1;
- float pan0;
- float pan1;
- float pitch;
- tsPitchData* pitch_filter[2];
- int sample_index;
- tsLoadedSound* loaded_sound;
- struct tsPlayingSound* next;
-} tsPlayingSound;
-
-// holds audio API info and other info
-struct tsContext;
-typedef struct tsContext tsContext;
-
-// The returned struct will contain a null pointer in tsLoadedSound::channel[ 0 ]
-// in the case of errors. Read g_tsErrorReason string for details on what happened.
-// Calls tsReadMemWAV internally.
-tsLoadedSound tsLoadWAV(const char* path);
-
-// Reads a WAV file from memory. Still allocates memory for the tsLoadedSound since
-// WAV format will interlace stereo, and we need separate data streams to do SIMD
-// properly.
-void tsReadMemWAV(const void* memory, tsLoadedSound* sound);
-
-// If stb_vorbis was included *before* tinysound go ahead and create
-// some functions for dealing with OGG files.
-#ifdef STB_VORBIS_INCLUDE_STB_VORBIS_H
-void tsReadMemOGG(const void* memory, int length, int* sample_rate, tsLoadedSound* sound);
-tsLoadedSound tsLoadOGG(const char* path, int* sample_rate);
-#endif
-
-// Uses free16 (aligned free, implemented later in this file) to free up both of
-// the channels stored within sound
-void tsFreeSound(tsLoadedSound* sound);
-
-// Returns the size, in bytes, of all heap-allocated memory for this particular
-// loaded sound
-int tsSoundSize(tsLoadedSound* sound);
-
-// playing_pool_count -- 0 to setup low-level API, non-zero to size the internal
-// memory pool for tsPlayingSound instances
-tsContext* tsMakeContext(void* hwnd, unsigned play_frequency_in_Hz, int latency_factor_in_Hz, int num_buffered_seconds, int playing_pool_count);
-void tsShutdownContext(tsContext* ctx);
-
-// Call tsSpawnMixThread once to setup a separate thread for the context to run
-// upon. The separate thread will continually call tsMix and perform mixing
-// operations.
-void tsSpawnMixThread(tsContext* ctx);
-
-// Use tsThreadSleepDelay to specify a custom sleep delay time.
-// A sleep will occur after each call to tsMix. By default YieldProcessor
-// is used, and no sleep occurs. Use a sleep delay to conserve CPU bandwidth.
-// A recommended sleep time is a little less than 1/2 your predicted 1/FPS.
-// 60 fps is 16 ms, so about 1-5 should work well in most cases.
-void tsThreadSleepDelay(tsContext* ctx, int milliseconds);
-
-// Call this manually, once per game tick recommended, if you haven't ever
-// called tsSpawnMixThread. Otherwise the thread will call tsMix itself.
-// num_samples_to_write is not used on Windows. On Mac it is used to push
-// samples into a circular buffer while CoreAudio simultaneously pulls samples
-// off of the buffer. num_samples_to_write should be computed each update tick
-// as delta_time * play_frequency_in_Hz + 1.
-void tsMix(tsContext* ctx);
-
-// All of the functions in this next section should only be called if tsIsActive
-// returns true. Calling them otherwise probably won't do anything bad, but it
-// won't do anything at all. If a sound is active it resides in the context's
-// internal list of playing sounds.
-int tsIsActive(tsPlayingSound* sound);
-
-// Flags sound for removal. Upon next tsMix call will remove sound from playing
-// list. If high-level API used sound is placed onto the internal free list.
-void tsStopSound(tsPlayingSound* sound);
-
-void tsLoopSound(tsPlayingSound* sound, int zero_for_no_loop);
-void tsPauseSound(tsPlayingSound* sound, int one_for_paused);
-
-// lerp from 0 to 1, 0 full left, 1 full right
-void tsSetPan(tsPlayingSound* sound, float pan);
-
-// explicitly set volume of each channel. Can be used as panning (but it's
-// recommended to use the tsSetPan function for panning).
-void tsSetVolume(tsPlayingSound* sound, float volume_left, float volume_right);
-
-// Change pitch (not duration) of sound. pitch = 0.5f for one octave lower, pitch = 2.0f for one octave higher.
-// pitch at 1.0f applies no change. pitch settings farther away from 1.0f create more distortion and lower
-// the output sample quality. pitch can be adjusted in real-time for doppler effects and the like. Going beyond
-// 0.5f and 2.0f may require some tweaking the pitch shifting parameters, and is not recommended.
-
-// Additional important information about performance: This function
-// is quite expensive -- you have been warned! Try it out and be aware of how much CPU consumption it uses.
-// To avoid destroying the originally loaded sound samples, tsSetPitch will do a one-time allocation to copy
-// sound samples into a new buffer. The new buffer contains the pitch adjusted samples, and these will be played
-// through tsMix. This lets the pitch be modulated at run-time, but requires dynamically allocated memory. The
-// memory is freed once the sound finishes playing. If a one-time pitch adjustment is desired, for performance
-// reasons please consider doing an off-line pitch adjustment manually as a pre-processing step for your sounds.
-// Also, consider changing malloc16 and free16 to match your custom memory allocation needs. Try adjusting
-// TS_PITCH_QUALITY (must be a power of two) and see how this affects your performance.
-void tsSetPitch(tsPlayingSound* sound, float pitch);
-
-// Delays sound before actually playing it. Requires context to be passed in
-// since there's a conversion from seconds to samples per second.
-// If one were so inclined another version could be implemented like:
-// void tsSetDelay( tsPlayingSound* sound, float delay, int samples_per_second )
-void tsSetDelay(tsContext* ctx, tsPlayingSound* sound, float delay_in_seconds);
-
-// Portable sleep function
-void tsSleep(int milliseconds);
-
-// LOW-LEVEL API
-tsPlayingSound tsMakePlayingSound(tsLoadedSound* loaded);
-void tsInsertSound(tsContext* ctx, tsPlayingSound* sound);
-
-// HIGH-LEVEL API
-typedef struct
-{
- int paused;
- int looped;
- float volume_left;
- float volume_right;
- float pan;
- float pitch;
- float delay;
- tsLoadedSound* loaded;
-} tsPlaySoundDef;
-
-tsPlayingSound* tsPlaySound(tsContext* ctx, tsPlaySoundDef def);
-tsPlaySoundDef tsMakeDef(tsLoadedSound* sound);
-void tsStopAllSounds(tsContext* ctx);
-
-#define TINYSOUND_H
-#endif
-
-#ifdef TS_IMPLEMENTATION
-
-#define _CRT_SECURE_NO_WARNINGS FUCK_YOU
-#include <stdlib.h> // malloc, free
-#include <stdio.h> // fopen, fclose
-#include <string.h> // memcmp, memset, memcpy
-#include <xmmintrin.h>
-#include <emmintrin.h>
-
-#if TS_PLATFORM == TS_WINDOWS
-
-#include <dsound.h>
-#undef PlaySound
-
-#if defined( _MSC_VER )
-#pragma comment( lib, "dsound.lib" )
-#endif
-
-#elif TS_PLATFORM == TS_MAC
-
-#include <CoreAudio/CoreAudio.h>
-#include <AudioUnit/AudioUnit.h>
-#include <pthread.h>
-#include <mach/mach_time.h>
-
-#else
-
-#include "SDL2/SDL.h"
-
-#endif
-
-#define TS_CHECK( X, Y ) do { if ( !(X) ) { g_tsErrorReason = Y; goto ts_err; } } while ( 0 )
-#if TS_PLATFORM == TS_MAC && defined( __clang__ )
-#define TS_ASSERT_INTERNAL __builtin_trap( )
-#else
-#define TS_ASSERT_INTERNAL *(int*)0 = 0
-#endif
-#define TS_ASSERT( X ) do { if ( !(X) ) TS_ASSERT_INTERNAL; } while ( 0 )
-#define TS_ALIGN( X, Y ) ((((size_t)X) + ((Y) - 1)) & ~((Y) - 1))
-#define TS_TRUNC( X, Y ) ((size_t)(X) & ~((Y) - 1))
-
-const char* g_tsErrorReason;
-
-static void* tsReadFileToMemory(const char* path, int* size)
-{
- void* data = 0;
- FILE* fp = fopen(path, "rb");
- int sizeNum = 0;
-
- if (fp)
- {
- fseek(fp, 0, SEEK_END);
- sizeNum = (int)ftell(fp);
- fseek(fp, 0, SEEK_SET);
- data = malloc(sizeNum);
- fread(data, sizeNum, 1, fp);
- fclose(fp);
- }
-
- if (size) *size = sizeNum;
- return data;
-}
-
-static int tsFourCC(const char* CC, void* memory)
-{
- if (!memcmp(CC, memory, 4)) return 1;
- return 0;
-}
-
-static char* tsNext(char* data)
-{
- uint32_t size = *(uint32_t*)(data + 4);
- size = (size + 1) & ~1;
- return data + 8 + size;
-}
-
-static void* malloc16(size_t size)
-{
- void* p = malloc(size + 16);
- if (!p) return 0;
- unsigned char offset = (size_t)p & 15;
- p = (void*)TS_ALIGN(p + 1, 16);
- *((char*)p - 1) = 16 - offset;
- TS_ASSERT(!((size_t)p & 15));
- return p;
-}
-
-static void free16(void* p)
-{
- if (!p) return;
- free((char*)p - (size_t)*((char*)p - 1));
-}
-
-static void tsLastElement(__m128* a, int i, int j, int16_t* samples, int offset)
-{
- switch (offset)
- {
- case 1:
- a[i] = _mm_set_ps(samples[j], 0.0f, 0.0f, 0.0f);
- break;
-
- case 2:
- a[i] = _mm_set_ps(samples[j], samples[j + 1], 0.0f, 0.0f);
- break;
-
- case 3:
- a[i] = _mm_set_ps(samples[j], samples[j + 1], samples[j + 2], 0.0f);
- break;
-
- case 0:
- a[i] = _mm_set_ps(samples[j], samples[j + 1], samples[j + 2], samples[j + 3]);
- break;
- }
-}
-
-void tsReadMemWAV(const void* memory, tsLoadedSound* sound)
-{
-#pragma pack( push, 1 )
- typedef struct
- {
- uint16_t wFormatTag;
- uint16_t nChannels;
- uint32_t nSamplesPerSec;
- uint32_t nAvgBytesPerSec;
- uint16_t nBlockAlign;
- uint16_t wBitsPerSample;
- uint16_t cbSize;
- uint16_t wValidBitsPerSample;
- uint32_t dwChannelMask;
- uint8_t SubFormat[18];
- } Fmt;
-#pragma pack( pop )
-
- char* data = (char*)memory;
- TS_CHECK(data, "Unable to read input file (file doesn't exist, or could not allocate heap memory.");
- TS_CHECK(tsFourCC("RIFF", data), "Incorrect file header; is this a WAV file?");
- TS_CHECK(tsFourCC("WAVE", data + 8), "Incorrect file header; is this a WAV file?");
-
- data += 12;
-
- TS_CHECK(tsFourCC("fmt ", data), "fmt chunk not found.");
- Fmt fmt;
- fmt = *(Fmt*)(data + 8);
- TS_CHECK(fmt.wFormatTag == 1, "Only PCM WAV files are supported.");
- TS_CHECK(fmt.nChannels == 1 || fmt.nChannels == 2, "Only mono or stereo supported (too many channels detected).");
- TS_CHECK(fmt.wBitsPerSample == 16, "Only 16 bits per sample supported.");
- TS_CHECK(fmt.nBlockAlign == fmt.nChannels * 2, "implementation error");
-
- data = tsNext(data);
- TS_CHECK(tsFourCC("data", data), "data chunk not found.");
- int sample_size = *((uint32_t*)(data + 4));
- int sample_count = sample_size / (fmt.nChannels * sizeof(uint16_t));
- sound->sample_count = sample_count;
- sound->channel_count = fmt.nChannels;
-
- int wide_count = (int)TS_ALIGN(sample_count, 4);
- wide_count /= 4;
- int wide_offset = sample_count & 3;
- int16_t* samples = (int16_t*)(data + 8);
- float* sample = (float*)alloca(sizeof(float) * 4 + 16);
- sample = (float*)TS_ALIGN(sample, 16);
-
- switch (sound->channel_count)
- {
- case 1:
- {
- sound->channels[0] = malloc16(wide_count * sizeof(__m128));
- sound->channels[1] = 0;
- __m128* a = (__m128*)sound->channels[0];
-
- for (int i = 0, j = 0; i < wide_count - 1; ++i, j += 4)
- {
- sample[0] = (float)samples[j];
- sample[1] = (float)samples[j + 1];
- sample[2] = (float)samples[j + 2];
- sample[3] = (float)samples[j + 3];
- a[i] = _mm_load_ps(sample);
- }
-
- tsLastElement(a, wide_count - 1, (wide_count - 1) * 4, samples, wide_offset);
- } break;
-
- case 2:
- {
- __m128* a = (__m128*)malloc16(wide_count * sizeof(__m128) * 2);
- __m128* b = a + wide_count;
-
- for (int i = 0, j = 0; i < wide_count - 1; ++i, j += 8)
- {
- sample[0] = (float)samples[j];
- sample[1] = (float)samples[j + 2];
- sample[2] = (float)samples[j + 4];
- sample[3] = (float)samples[j + 6];
- a[i] = _mm_load_ps(sample);
-
- sample[0] = (float)samples[j + 1];
- sample[1] = (float)samples[j + 3];
- sample[2] = (float)samples[j + 5];
- sample[3] = (float)samples[j + 7];
- b[i] = _mm_load_ps(sample);
- }
-
- tsLastElement(a, wide_count - 1, (wide_count - 1) * 4, samples, wide_offset);
- tsLastElement(b, wide_count - 1, (wide_count - 1) * 4 + 4, samples, wide_offset);
- sound->channels[0] = a;
- sound->channels[1] = b;
- } break;
-
- default:
- TS_CHECK(0, "unsupported channel count (only support mono and stereo).");
- }
-
- return;
-
-ts_err:
- memset(&sound, 0, sizeof(sound));
-}
-
-tsLoadedSound tsLoadWAV(const char* path)
-{
- tsLoadedSound sound = { 0 };
- char* wav = (char*)tsReadFileToMemory(path, 0);
- tsReadMemWAV(wav, &sound);
- free(wav);
- return sound;
-}
-
-// If stb_vorbis was included *before* tinysound go ahead and create
-// some functions for dealing with OGG files.
-#ifdef STB_VORBIS_INCLUDE_STB_VORBIS_H
-void tsReadMemOGG(const void* memory, int length, int* sample_rate, tsLoadedSound* sound)
-{
- int16_t* samples = 0;
- int channel_count;
- int sample_count = stb_vorbis_decode_memory((const unsigned char*)memory, length, &channel_count, sample_rate, &samples);
-
- TS_CHECK(sample_count > 0, "stb_vorbis_decode_memory failed. Make sure your file exists and is a valid OGG file.");
-
- int wide_count = (int)TS_ALIGN(sample_count, 4) / 4;
- int wide_offset = sample_count & 3;
- float* sample = (float*)alloca(sizeof(float) * 4 + 16);
- sample = (float*)TS_ALIGN(sample, 16);
- __m128* a;
- __m128* b;
-
- switch (channel_count)
- {
- case 1:
- {
- a = (__m128*)malloc16(wide_count * sizeof(__m128));
- b = 0;
-
- for (int i = 0, j = 0; i < wide_count - 1; ++i, j += 4)
- {
- sample[0] = (float)samples[j];
- sample[1] = (float)samples[j + 1];
- sample[2] = (float)samples[j + 2];
- sample[3] = (float)samples[j + 3];
- a[i] = _mm_load_ps(sample);
- }
-
- tsLastElement(a, wide_count - 1, (wide_count - 1) * 4, samples, wide_offset);
- } break;
-
- case 2:
- a = (__m128*)malloc16(wide_count * sizeof(__m128) * 2);
- b = a + wide_count;
-
- for (int i = 0, j = 0; i < wide_count - 1; ++i, j += 8)
- {
- sample[0] = (float)samples[j];
- sample[1] = (float)samples[j + 2];
- sample[2] = (float)samples[j + 4];
- sample[3] = (float)samples[j + 6];
- a[i] = _mm_load_ps(sample);
-
- sample[0] = (float)samples[j + 1];
- sample[1] = (float)samples[j + 3];
- sample[2] = (float)samples[j + 5];
- sample[3] = (float)samples[j + 7];
- b[i] = _mm_load_ps(sample);
- }
-
- tsLastElement(a, wide_count - 1, (wide_count - 1) * 4, samples, wide_offset);
- tsLastElement(b, wide_count - 1, (wide_count - 1) * 4 + 4, samples, wide_offset);
- break;
-
- default:
- TS_CHECK(0, "Unsupported channel count.");
- }
-
- sound->sample_count = sample_count;
- sound->channel_count = channel_count;
- sound->channels[0] = a;
- sound->channels[1] = b;
- free(samples);
- return;
-
-ts_err:
- free(samples);
- memset(sound, 0, sizeof(tsLoadedSound));
-}
-
-tsLoadedSound tsLoadOGG(const char* path, int* sample_rate)
-{
- int length;
- void* memory = tsReadFileToMemory(path, &length);
- tsLoadedSound sound;
- tsReadMemOGG(memory, length, sample_rate, &sound);
- free(memory);
-
- return sound;
-}
-#endif
-
-void tsFreeSound(tsLoadedSound* sound)
-{
- free16(sound->channels[0]);
- memset(sound, 0, sizeof(tsLoadedSound));
-}
-
-int tsSoundSize(tsLoadedSound* sound)
-{
- return sound->sample_count * sound->channel_count * sizeof(uint16_t);
-}
-
-tsPlayingSound tsMakePlayingSound(tsLoadedSound* loaded)
-{
- tsPlayingSound playing;
- playing.active = 0;
- playing.paused = 0;
- playing.looped = 0;
- playing.volume0 = 1.0f;
- playing.volume1 = 1.0f;
- playing.pan0 = 0.5f;
- playing.pan1 = 0.5f;
- playing.pitch = 1.0f;
- playing.pitch_filter[0] = 0;
- playing.pitch_filter[1] = 0;
- playing.sample_index = 0;
- playing.loaded_sound = loaded;
- playing.next = 0;
- return playing;
-}
-
-int tsIsActive(tsPlayingSound* sound)
-{
- return sound->active;
-}
-
-void tsStopSound(tsPlayingSound* sound)
-{
- sound->active = 0;
-}
-
-void tsLoopSound(tsPlayingSound* sound, int zero_for_no_loop)
-{
- sound->looped = zero_for_no_loop;
-}
-
-void tsPauseSound(tsPlayingSound* sound, int one_for_paused)
-{
- sound->paused = one_for_paused;
-}
-
-void tsSetPan(tsPlayingSound* sound, float pan)
-{
- if (pan > 1.0f) pan = 1.0f;
- else if (pan < 0.0f) pan = 0.0f;
- float left = 1.0f - pan;
- float right = pan;
- sound->pan0 = left;
- sound->pan1 = right;
-}
-
-void tsSetPitch(tsPlayingSound* sound, float pitch)
-{
- sound->pitch = pitch;
-}
-
-void tsSetVolume(tsPlayingSound* sound, float volume_left, float volume_right)
-{
- if (volume_left < 0.0f) volume_left = 0.0f;
- if (volume_right < 0.0f) volume_right = 0.0f;
- sound->volume0 = volume_left;
- sound->volume1 = volume_right;
-}
-
-static void tsRemoveFilter(tsPlayingSound* playing);
-
-#if TS_PLATFORM == TS_WINDOWS
-
-void tsSleep(int milliseconds)
-{
- Sleep(milliseconds);
-}
-
-struct tsContext
-{
- unsigned latency_samples;
- unsigned running_index;
- int Hz;
- int bps;
- int buffer_size;
- int wide_count;
- tsPlayingSound* playing;
- __m128* floatA;
- __m128* floatB;
- __m128i* samples;
- tsPlayingSound* playing_pool;
- tsPlayingSound* playing_free;
-
- // platform specific stuff
- LPDIRECTSOUND dsound;
- LPDIRECTSOUNDBUFFER buffer;
- LPDIRECTSOUNDBUFFER primary;
-
- // data for tsMix thread, enable these with tsSpawnMixThread
- CRITICAL_SECTION critical_section;
- int separate_thread;
- int running;
- int sleep_milliseconds;
-};
-
-static void tsReleaseContext(tsContext* ctx)
-{
- if (ctx->separate_thread) DeleteCriticalSection(&ctx->critical_section);
-#ifdef __cplusplus
- ctx->buffer->Release();
- ctx->primary->Release();
- ctx->dsound->Release();
-#else
- ctx->buffer->lpVtbl->Release(ctx->buffer);
- ctx->primary->lpVtbl->Release(ctx->primary);
- ctx->dsound->lpVtbl->Release(ctx->dsound);
-#endif
- tsPlayingSound* playing = ctx->playing;
- while (playing)
- {
- tsRemoveFilter(playing);
- playing = playing->next;
- }
- free(ctx);
-}
-
-static DWORD WINAPI tsCtxThread(LPVOID lpParameter)
-{
- tsContext* ctx = (tsContext*)lpParameter;
-
- while (ctx->running)
- {
- tsMix(ctx);
- if (ctx->sleep_milliseconds) tsSleep(ctx->sleep_milliseconds);
- else YieldProcessor();
- }
-
- ctx->separate_thread = 0;
- return 0;
-}
-
-static void tsLock(tsContext* ctx)
-{
- if (ctx->separate_thread) EnterCriticalSection(&ctx->critical_section);
-}
-
-static void tsUnlock(tsContext* ctx)
-{
- if (ctx->separate_thread) LeaveCriticalSection(&ctx->critical_section);
-}
-
-tsContext* tsMakeContext(void* hwnd, unsigned play_frequency_in_Hz, int latency_factor_in_Hz, int num_buffered_seconds, int playing_pool_count)
-{
- int bps = sizeof(INT16) * 2;
- int buffer_size = play_frequency_in_Hz * bps * num_buffered_seconds;
- tsContext* ctx = 0;
- WAVEFORMATEX format = { 0 };
- DSBUFFERDESC bufdesc = { 0 };
- LPDIRECTSOUND dsound;
-
- TS_CHECK(hwnd, "Invalid hwnd passed to tsMakeContext.");
-
- HRESULT res = DirectSoundCreate(0, &dsound, 0);
- TS_CHECK(res == DS_OK, "DirectSoundCreate failed");
-#ifdef __cplusplus
- dsound->SetCooperativeLevel((HWND)hwnd, DSSCL_PRIORITY);
-#else
- dsound->lpVtbl->SetCooperativeLevel(dsound, (HWND)hwnd, DSSCL_PRIORITY);
-#endif
- bufdesc.dwSize = sizeof(bufdesc);
- bufdesc.dwFlags = DSBCAPS_PRIMARYBUFFER;
-
- LPDIRECTSOUNDBUFFER primary_buffer;
-#ifdef __cplusplus
- res = dsound->CreateSoundBuffer(&bufdesc, &primary_buffer, 0);
-#else
- res = dsound->lpVtbl->CreateSoundBuffer(dsound, &bufdesc, &primary_buffer, 0);
-#endif
- TS_CHECK(res == DS_OK, "Failed to create primary sound buffer");
-
- format.wFormatTag = WAVE_FORMAT_PCM;
- format.nChannels = 2;
- format.nSamplesPerSec = play_frequency_in_Hz;
- format.wBitsPerSample = 16;
- format.nBlockAlign = (format.nChannels * format.wBitsPerSample) / 8;
- format.nAvgBytesPerSec = format.nSamplesPerSec * format.nBlockAlign;
- format.cbSize = 0;
-#ifdef __cplusplus
- res = primary_buffer->SetFormat(&format);
-#else
- res = primary_buffer->lpVtbl->SetFormat(primary_buffer, &format);
-#endif
- TS_CHECK(res == DS_OK, "Failed to set format on primary buffer");
-
- LPDIRECTSOUNDBUFFER secondary_buffer;
- bufdesc.dwSize = sizeof(bufdesc);
- bufdesc.dwFlags = 0;
- bufdesc.dwBufferBytes = buffer_size;
- bufdesc.lpwfxFormat = &format;
-#ifdef __cplusplus
- res = dsound->CreateSoundBuffer(&bufdesc, &secondary_buffer, 0);
-#else
- res = dsound->lpVtbl->CreateSoundBuffer(dsound, &bufdesc, &secondary_buffer, 0);
-#endif
- TS_CHECK(res == DS_OK, "Failed to set format on secondary buffer");
-
- int sample_count = play_frequency_in_Hz * num_buffered_seconds;
- int wide_count = (int)TS_ALIGN(sample_count, 4);
- int pool_size = playing_pool_count * sizeof(tsPlayingSound);
- int mix_buffers_size = sizeof(__m128) * wide_count * 2;
- int sample_buffer_size = sizeof(__m128i) * wide_count;
- ctx = (tsContext*)malloc(sizeof(tsContext) + mix_buffers_size + sample_buffer_size + 16 + pool_size);
- ctx->latency_samples = (unsigned)TS_ALIGN(play_frequency_in_Hz / latency_factor_in_Hz, 4);
- ctx->running_index = 0;
- ctx->Hz = play_frequency_in_Hz;
- ctx->bps = bps;
- ctx->buffer_size = buffer_size;
- ctx->wide_count = wide_count;
- ctx->dsound = dsound;
- ctx->buffer = secondary_buffer;
- ctx->primary = primary_buffer;
- ctx->playing = 0;
- ctx->floatA = (__m128*)(ctx + 1);
- ctx->floatA = (__m128*)TS_ALIGN(ctx->floatA, 16);
- TS_ASSERT(!((size_t)ctx->floatA & 15));
- ctx->floatB = ctx->floatA + wide_count;
- ctx->samples = (__m128i*)ctx->floatB + wide_count;
- ctx->running = 1;
- ctx->separate_thread = 0;
- ctx->sleep_milliseconds = 0;
-
- if (playing_pool_count)
- {
- ctx->playing_pool = (tsPlayingSound*)(ctx->samples + wide_count);
- for (int i = 0; i < playing_pool_count - 1; ++i)
- ctx->playing_pool[i].next = ctx->playing_pool + i + 1;
- ctx->playing_pool[playing_pool_count - 1].next = 0;
- ctx->playing_free = ctx->playing_pool;
- }
-
- else
- {
- ctx->playing_pool = 0;
- ctx->playing_free = 0;
- }
-
- return ctx;
-
-ts_err:
- free(ctx);
- return 0;
-}
-
-void tsSpawnMixThread(tsContext* ctx)
-{
- if (ctx->separate_thread) return;
- InitializeCriticalSectionAndSpinCount(&ctx->critical_section, 0x00000400);
- ctx->separate_thread = 1;
- CreateThread(0, 0, tsCtxThread, ctx, 0, 0);
-}
-
-#elif TS_PLATFORM == TS_MAC
-
-void tsSleep(int milliseconds)
-{
- usleep(milliseconds * 1000);
-}
-
-struct tsContext
-{
- unsigned latency_samples;
- unsigned index0; // read
- unsigned index1; // write
- int Hz;
- int bps;
- int wide_count;
- int sample_count;
- tsPlayingSound* playing;
- __m128* floatA;
- __m128* floatB;
- __m128i* samples;
- tsPlayingSound* playing_pool;
- tsPlayingSound* playing_free;
-
- // platform specific stuff
- AudioComponentInstance inst;
-
- // data for tsMix thread, enable these with tsSpawnMixThread
- pthread_t thread;
- pthread_mutex_t mutex;
- int separate_thread;
- int running;
- int sleep_milliseconds;
-};
-
-static void tsReleaseContext(tsContext* ctx)
-{
- if (ctx->separate_thread) pthread_mutex_destroy(&ctx->mutex);
- AudioOutputUnitStop(ctx->inst);
- AudioUnitUninitialize(ctx->inst);
- AudioComponentInstanceDispose(ctx->inst);
- tsPlayingSound* playing = ctx->playing;
- while (playing)
- {
- tsRemoveFilter(playing);
- playing = playing->next;
- }
- free(ctx);
-}
-
-static void* tsCtxThread(void* udata)
-{
- tsContext* ctx = (tsContext*)udata;
-
- while (ctx->running)
- {
- tsMix(ctx);
- if (ctx->sleep_milliseconds) tsSleep(ctx->sleep_milliseconds);
- else pthread_yield_np();
- }
-
- ctx->separate_thread = 0;
- pthread_exit(0);
- return 0;
-}
-
-static void tsLock(tsContext* ctx)
-{
- if (ctx->separate_thread) pthread_mutex_lock(&ctx->mutex);
-}
-
-static void tsUnlock(tsContext* ctx)
-{
- if (ctx->separate_thread) pthread_mutex_unlock(&ctx->mutex);
-}
-
-static OSStatus tsMemcpyToCA(void* udata, AudioUnitRenderActionFlags* ioActionFlags, const AudioTimeStamp* inTimeStamp, UInt32 inBusNumber, UInt32 inNumberFrames, AudioBufferList* ioData);
-
-tsContext* tsMakeContext(void* unused, unsigned play_frequency_in_Hz, int latency_factor_in_Hz, int num_buffered_seconds, int playing_pool_count)
-{
- int bps = sizeof(uint16_t) * 2;
-
- AudioComponentDescription comp_desc = { 0 };
- comp_desc.componentType = kAudioUnitType_Output;
- comp_desc.componentSubType = kAudioUnitSubType_DefaultOutput;
- comp_desc.componentFlags = 0;
- comp_desc.componentFlagsMask = 0;
- comp_desc.componentManufacturer = kAudioUnitManufacturer_Apple;
-
- AudioComponent comp = AudioComponentFindNext(NULL, &comp_desc);
- if (!comp)
- {
- g_tsErrorReason = "Failed to create output unit from AudioComponentFindNext.";
- return 0;
- }
-
- AudioStreamBasicDescription stream_desc = { 0 };
- stream_desc.mSampleRate = (double)play_frequency_in_Hz;
- stream_desc.mFormatID = kAudioFormatLinearPCM;
- stream_desc.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked;
- stream_desc.mFramesPerPacket = 1;
- stream_desc.mChannelsPerFrame = 2;
- stream_desc.mBitsPerChannel = sizeof(uint16_t) * 8;
- stream_desc.mBytesPerPacket = bps;
- stream_desc.mBytesPerFrame = bps;
- stream_desc.mReserved = 0;
-
- AudioComponentInstance inst;
- OSStatus ret;
- AURenderCallbackStruct input;
-
- ret = AudioComponentInstanceNew(comp, &inst);
-
- int sample_count = play_frequency_in_Hz * num_buffered_seconds;
- int latency_count = (unsigned)TS_ALIGN(play_frequency_in_Hz / latency_factor_in_Hz, 4);
- TS_ASSERT(sample_count > latency_count);
- int wide_count = (int)TS_ALIGN(sample_count, 4) / 4;
- int pool_size = playing_pool_count * sizeof(tsPlayingSound);
- int mix_buffers_size = sizeof(__m128) * wide_count * 2;
- int sample_buffer_size = sizeof(__m128i) * wide_count;
- tsContext* ctx = (tsContext*)malloc(sizeof(tsContext) + mix_buffers_size + sample_buffer_size + 16 + pool_size);
- TS_CHECK(ret == noErr, "AudioComponentInstanceNew failed");
- ctx->latency_samples = latency_count;
- ctx->index0 = 0;
- ctx->index1 = 0;
- ctx->Hz = play_frequency_in_Hz;
- ctx->bps = bps;
- ctx->wide_count = wide_count;
- ctx->sample_count = wide_count * 4;
- ctx->inst = inst;
- ctx->playing = 0;
- ctx->floatA = (__m128*)(ctx + 1);
- ctx->floatA = (__m128*)TS_ALIGN(ctx->floatA, 16);
- TS_ASSERT(!((size_t)ctx->floatA & 15));
- ctx->floatB = ctx->floatA + wide_count;
- ctx->samples = (__m128i*)ctx->floatB + wide_count;
- ctx->running = 1;
- ctx->separate_thread = 0;
- ctx->sleep_milliseconds = 0;
-
- ret = AudioUnitSetProperty(inst, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, 0, &stream_desc, sizeof(stream_desc));
- TS_CHECK(ret == noErr, "Failed to set stream forat");
-
- input.inputProc = tsMemcpyToCA;
- input.inputProcRefCon = ctx;
- ret = AudioUnitSetProperty(inst, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input, 0, &input, sizeof(input));
- TS_CHECK(ret == noErr, "AudioUnitSetProperty failed");
-
- ret = AudioUnitInitialize(inst);
- TS_CHECK(ret == noErr, "Couldn't initialize output unit");
-
- ret = AudioOutputUnitStart(inst);
- TS_CHECK(ret == noErr, "Couldn't start output unit");
-
- if (playing_pool_count)
- {
- ctx->playing_pool = (tsPlayingSound*)(ctx->samples + wide_count);
- for (int i = 0; i < playing_pool_count - 1; ++i)
- ctx->playing_pool[i].next = ctx->playing_pool + i + 1;
- ctx->playing_pool[playing_pool_count - 1].next = 0;
- ctx->playing_free = ctx->playing_pool;
- }
-
- else
- {
- ctx->playing_pool = 0;
- ctx->playing_free = 0;
- }
-
- return ctx;
-
-ts_err:
- free(ctx);
- return 0;
-}
-
-void tsSpawnMixThread(tsContext* ctx)
-{
- if (ctx->separate_thread) return;
- pthread_mutex_init(&ctx->mutex, 0);
- ctx->separate_thread = 1;
- pthread_create(&ctx->thread, 0, tsCtxThread, ctx);
-}
-
-#else
-
-void tsSleep(int milliseconds)
-{
- SDL_Delay(milliseconds);
-}
-
-struct tsContext
-{
- unsigned latency_samples;
- unsigned index0; // read
- unsigned index1; // write
- unsigned running_index;
- int Hz;
- int bps;
- int buffer_size;
- int wide_count;
- int sample_count;
- tsPlayingSound* playing;
- __m128* floatA;
- __m128* floatB;
- __m128i* samples;
- tsPlayingSound* playing_pool;
- tsPlayingSound* playing_free;
-
- // data for tsMix thread, enable these with tsSpawnMixThread
- SDL_Thread* thread;
- SDL_mutex* mutex;
- int separate_thread;
- int running;
- int sleep_milliseconds;
-};
-
-static void tsReleaseContext(tsContext* ctx)
-{
- if (ctx->separate_thread) SDL_DestroyMutex(ctx->mutex);
- tsPlayingSound* playing = ctx->playing;
- while (playing)
- {
- tsRemoveFilter(playing);
- playing = playing->next;
- }
- SDL_CloseAudio();
- free(ctx);
-}
-
-int tsCtxThread(void* udata)
-{
- tsContext* ctx = (tsContext*)udata;
-
- while (ctx->running)
- {
- tsMix(ctx);
- if (ctx->sleep_milliseconds) tsSleep(ctx->sleep_milliseconds);
- else tsSleep(1);
- }
-
- ctx->separate_thread = 0;
- return 0;
-}
-
-static void tsLock(tsContext* ctx)
-{
- if (ctx->separate_thread) SDL_LockMutex(ctx->mutex);
-}
-
-static void tsUnlock(tsContext* ctx)
-{
- if (ctx->separate_thread) SDL_UnlockMutex(ctx->mutex);
-}
-
-void tsSDL_AudioCallback(void* udata, Uint8* stream, int len);
-
-tsContext* tsMakeContext(void* unused, unsigned play_frequency_in_Hz, int latency_factor_in_Hz, int num_buffered_seconds, int playing_pool_count)
-{
- (void)unused;
- int bps = sizeof(uint16_t) * 2;
- int sample_count = play_frequency_in_Hz * num_buffered_seconds;
- int latency_count = (unsigned)TS_ALIGN(play_frequency_in_Hz / latency_factor_in_Hz, 4);
- TS_ASSERT(sample_count > latency_count);
- int wide_count = (int)TS_ALIGN(sample_count, 4) / 4;
- int pool_size = playing_pool_count * sizeof(tsPlayingSound);
- int mix_buffers_size = sizeof(__m128) * wide_count * 2;
- int sample_buffer_size = sizeof(__m128i) * wide_count;
- tsContext* ctx = 0;
- SDL_AudioSpec wanted;
- int ret = SDL_Init(SDL_INIT_AUDIO);
- TS_CHECK(ret >= 0, "Can't init SDL audio");
-
- ctx = (tsContext*)malloc(sizeof(tsContext) + mix_buffers_size + sample_buffer_size + 16 + pool_size);
- TS_CHECK(ctx != NULL, "Can't create audio context");
- ctx->latency_samples = latency_count;
- ctx->index0 = 0;
- ctx->index1 = 0;
- ctx->Hz = play_frequency_in_Hz;
- ctx->bps = bps;
- ctx->wide_count = wide_count;
- ctx->sample_count = wide_count * 4;
- ctx->playing = 0;
- ctx->floatA = (__m128*)(ctx + 1);
- ctx->floatA = (__m128*)TS_ALIGN(ctx->floatA, 16);
- TS_ASSERT(!((size_t)ctx->floatA & 15));
- ctx->floatB = ctx->floatA + wide_count;
- ctx->samples = (__m128i*)ctx->floatB + wide_count;
- ctx->running = 1;
- ctx->separate_thread = 0;
- ctx->sleep_milliseconds = 0;
-
- SDL_memset(&wanted, 0, sizeof(wanted));
- wanted.freq = play_frequency_in_Hz;
- wanted.format = AUDIO_S16SYS;
- wanted.channels = 2; /* 1 = mono, 2 = stereo */
- wanted.samples = 1024;
- wanted.callback = tsSDL_AudioCallback;
- wanted.userdata = ctx;
- ret = SDL_OpenAudio(&wanted, NULL);
- TS_CHECK(ret >= 0, "Can't open SDL audio");
- SDL_PauseAudio(0);
-
- if (playing_pool_count)
- {
- ctx->playing_pool = (tsPlayingSound*)(ctx->samples + wide_count);
- for (int i = 0; i < playing_pool_count - 1; ++i)
- ctx->playing_pool[i].next = ctx->playing_pool + i + 1;
- ctx->playing_pool[playing_pool_count - 1].next = 0;
- ctx->playing_free = ctx->playing_pool;
- }
-
- else
- {
- ctx->playing_pool = 0;
- ctx->playing_free = 0;
- }
-
- return ctx;
-
-ts_err:
- if (ctx) free(ctx);
- return 0;
-}
-
-void tsSpawnMixThread(tsContext* ctx)
-{
- if (ctx->separate_thread) return;
- ctx->mutex = SDL_CreateMutex();
- ctx->separate_thread = 1;
- ctx->thread = SDL_CreateThread(&tsCtxThread, "TinySoundThread", ctx);
-}
-
-#endif
-
-#if TS_PLATFORM == TS_SDL || TS_PLATFORM == TS_MAC
-
-static int tsSamplesWritten(tsContext* ctx)
-{
- int index0 = ctx->index0;
- int index1 = ctx->index1;
- if (index0 <= index1) return index1 - index0;
- else return ctx->sample_count - index0 + index1;
-}
-
-static int tsSamplesUnwritten(tsContext* ctx)
-{
- int index0 = ctx->index0;
- int index1 = ctx->index1;
- if (index0 <= index1) return ctx->sample_count - index1 + index0;
- else return index0 - index1;
-}
-
-static int tsSamplesToMix(tsContext* ctx)
-{
- int lat = ctx->latency_samples;
- int written = tsSamplesWritten(ctx);
- int dif = lat - written;
- TS_ASSERT(dif >= 0);
- if (dif)
- {
- int unwritten = tsSamplesUnwritten(ctx);
- return dif < unwritten ? dif : unwritten;
- }
- return 0;
-}
-
-#define TS_SAMPLES_TO_BYTES( interleaved_sample_count ) ((interleaved_sample_count) * ctx->bps)
-#define TS_BYTES_TO_SAMPLES( byte_count ) ((byte_count) / ctx->bps)
-
-static void tsPushBytes(tsContext* ctx, void* data, int size)
-{
- int index0 = ctx->index0;
- int index1 = ctx->index1;
- int samples = TS_BYTES_TO_SAMPLES(size);
- int sample_count = ctx->sample_count;
-
- int unwritten = tsSamplesUnwritten(ctx);
- if (unwritten < samples) samples = unwritten;
- int can_overflow = index0 <= index1;
- int would_overflow = index1 + samples > sample_count;
-
- if (can_overflow && would_overflow)
- {
- int first_size = TS_SAMPLES_TO_BYTES(sample_count - index1);
- int second_size = size - first_size;
- memcpy((char*)ctx->samples + TS_SAMPLES_TO_BYTES(index1), data, first_size);
- memcpy(ctx->samples, (char*)data + first_size, second_size);
- ctx->index1 = TS_BYTES_TO_SAMPLES(second_size);
- }
-
- else
- {
- memcpy((char*)ctx->samples + TS_SAMPLES_TO_BYTES(index1), data, size);
- ctx->index1 += TS_BYTES_TO_SAMPLES(size);
- }
-}
-
-static int tsPullBytes(tsContext* ctx, void* dst, int size)
-{
- int index0 = ctx->index0;
- int index1 = ctx->index1;
- int allowed_size = TS_SAMPLES_TO_BYTES(tsSamplesWritten(ctx));
- int zeros = 0;
-
- if (allowed_size < size)
- {
- zeros = size - allowed_size;
- size = allowed_size;
- }
-
- if (index1 >= index0)
- {
- memcpy(dst, ((char*)ctx->samples) + TS_SAMPLES_TO_BYTES(index0), size);
- ctx->index0 += TS_BYTES_TO_SAMPLES(size);
- }
-
- else
- {
- int first_size = TS_SAMPLES_TO_BYTES(ctx->sample_count) - TS_SAMPLES_TO_BYTES(index0);
- if (first_size > size) first_size = size;
- int second_size = size - first_size;
- memcpy(dst, ((char*)ctx->samples) + TS_SAMPLES_TO_BYTES(index0), first_size);
- memcpy(((char*)dst) + first_size, ctx->samples, second_size);
- if (second_size) ctx->index0 = TS_BYTES_TO_SAMPLES(second_size);
- else ctx->index0 += TS_BYTES_TO_SAMPLES(first_size);
- }
-
- return zeros;
-}
-
-#endif
-
-void tsShutdownContext(tsContext* ctx)
-{
- if (ctx->separate_thread)
- {
- tsLock(ctx);
- ctx->running = 0;
- tsUnlock(ctx);
- }
-
- while (ctx->separate_thread) tsSleep(1);
- tsReleaseContext(ctx);
-}
-
-void tsThreadSleepDelay(tsContext* ctx, int milliseconds)
-{
- ctx->sleep_milliseconds = milliseconds;
-}
-
-void tsInsertSound(tsContext* ctx, tsPlayingSound* sound)
-{
- // Cannot use tsPlayingSound if tsMakeContext was passed non-zero for playing_pool_count
- // since non-zero playing_pool_count means the context is doing some memory-management
- // for a playing sound pool. InsertSound assumes the pool does not exist, and is apart
- // of the lower-level API (see top of this header for documentation details).
- TS_ASSERT(ctx->playing_pool == 0);
-
- if (sound->active) return;
- tsLock(ctx);
- sound->next = ctx->playing;
- ctx->playing = sound;
- sound->active = 1;
- tsUnlock(ctx);
-}
-
-// NOTE: does not allow delay_in_seconds to be negative (clamps at 0)
-void tsSetDelay(tsContext* ctx, tsPlayingSound* sound, float delay_in_seconds)
-{
- if (delay_in_seconds < 0.0f) delay_in_seconds = 0.0f;
- sound->sample_index = (int)(delay_in_seconds * (float)ctx->Hz);
- sound->sample_index = -(int)TS_ALIGN(sound->sample_index, 4);
-}
-
-tsPlaySoundDef tsMakeDef(tsLoadedSound* sound)
-{
- tsPlaySoundDef def;
- def.paused = 0;
- def.looped = 0;
- def.volume_left = 1.0f;
- def.volume_right = 1.0f;
- def.pan = 0.5f;
- def.pitch = 1.0f;
- def.delay = 0.0f;
- def.loaded = sound;
- return def;
-}
-
-tsPlayingSound* tsPlaySound(tsContext* ctx, tsPlaySoundDef def)
-{
- tsLock(ctx);
-
- tsPlayingSound* playing = ctx->playing_free;
- if (!playing) return 0;
- ctx->playing_free = playing->next;
- *playing = tsMakePlayingSound(def.loaded);
- playing->active = 1;
- playing->paused = def.paused;
- playing->looped = def.looped;
- tsSetVolume(playing, def.volume_left, def.volume_right);
- tsSetPan(playing, def.pan);
- tsSetPitch(playing, def.pitch);
- tsSetDelay(ctx, playing, def.delay);
- playing->next = ctx->playing;
- ctx->playing = playing;
-
- tsUnlock(ctx);
-
- return playing;
-}
-
-void tsStopAllSounds(tsContext* ctx)
-{
- // This is apart of the high level API, not the low level API.
- // If using the low level API you must write your own function to
- // stop playing all sounds.
- TS_ASSERT(ctx->playing_pool == 0);
-
- tsPlayingSound* sound = ctx->playing;
- ctx->playing = 0;
-
- while (sound)
- {
- tsPlayingSound* next = sound->next;
- sound->next = ctx->playing_free;
- ctx->playing_free = sound;
- sound = next;
- }
-}
-
-#if TS_PLATFORM == TS_WINDOWS
-
-static void tsPosition(tsContext* ctx, int* byte_to_lock, int* bytes_to_write)
-{
- // compute bytes to be written to direct sound
- DWORD play_cursor;
- DWORD write_cursor;
-#ifdef __cplusplus
- HRESULT hr = ctx->buffer->GetCurrentPosition(&play_cursor, &write_cursor);
-#else
- HRESULT hr = ctx->buffer->lpVtbl->GetCurrentPosition(ctx->buffer, &play_cursor, &write_cursor);
-#endif
- TS_ASSERT(hr == DS_OK);
-
- DWORD lock = (ctx->running_index * ctx->bps) % ctx->buffer_size;
- DWORD target_cursor = (write_cursor + ctx->latency_samples * ctx->bps) % ctx->buffer_size;
- target_cursor = (DWORD)TS_ALIGN(target_cursor, 16);
- DWORD write;
-
- if (lock > target_cursor)
- {
- write = (ctx->buffer_size - lock) + target_cursor;
- }
-
- else
- {
- write = target_cursor - lock;
- }
-
- *byte_to_lock = lock;
- *bytes_to_write = write;
-}
-
-static void tsMemcpyToDS(tsContext* ctx, int16_t* samples, int byte_to_lock, int bytes_to_write)
-{
- // copy mixer buffers to direct sound
- void* region1;
- DWORD size1;
- void* region2;
- DWORD size2;
-#ifdef __cplusplus
- HRESULT hr = ctx->buffer->Lock(byte_to_lock, bytes_to_write, &region1, &size1, &region2, &size2, 0);
-
- if (hr == DSERR_BUFFERLOST)
- {
- ctx->buffer->Restore();
- hr = ctx->buffer->Lock(byte_to_lock, bytes_to_write, &region1, &size1, &region2, &size2, 0);
- }
-#else
- HRESULT hr = ctx->buffer->lpVtbl->Lock(ctx->buffer, byte_to_lock, bytes_to_write, &region1, &size1, &region2, &size2, 0);
-
- if (hr == DSERR_BUFFERLOST)
- {
- ctx->buffer->lpVtbl->Restore(ctx->buffer);
- hr = ctx->buffer->lpVtbl->Lock(ctx->buffer, byte_to_lock, bytes_to_write, &region1, &size1, &region2, &size2, 0);
- }
-#endif
-
- if (!SUCCEEDED(hr))
- return;
-
- unsigned running_index = ctx->running_index;
- INT16* sample1 = (INT16*)region1;
- DWORD sample1_count = size1 / ctx->bps;
- memcpy(sample1, samples, sample1_count * sizeof(INT16) * 2);
- samples += sample1_count * 2;
- running_index += sample1_count;
-
- INT16* sample2 = (INT16*)region2;
- DWORD sample2_count = size2 / ctx->bps;
- memcpy(sample2, samples, sample2_count * sizeof(INT16) * 2);
- samples += sample2_count * 2;
- running_index += sample2_count;
-
-#ifdef __cplusplus
- ctx->buffer->Unlock(region1, size1, region2, size2);
-#else
- ctx->buffer->lpVtbl->Unlock(ctx->buffer, region1, size1, region2, size2);
-#endif
- ctx->running_index = running_index;
-
- // meager hack to fill out sound buffer before playing
- static int first;
- if (!first)
- {
-#ifdef __cplusplus
- ctx->buffer->Play(0, 0, DSBPLAY_LOOPING);
-#else
- ctx->buffer->lpVtbl->Play(ctx->buffer, 0, 0, DSBPLAY_LOOPING);
-#endif
- first = 1;
- }
-}
-
-#elif TS_PLATFORM == TS_MAC
-
-static OSStatus tsMemcpyToCA(void* udata, AudioUnitRenderActionFlags* ioActionFlags, const AudioTimeStamp* inTimeStamp, UInt32 inBusNumber, UInt32 inNumberFrames, AudioBufferList* ioData)
-{
- tsContext* ctx = (tsContext*)udata;
- int bps = ctx->bps;
- int samples_requested_to_consume = inNumberFrames;
- AudioBuffer* buffer = ioData->mBuffers;
-
- TS_ASSERT(ioData->mNumberBuffers == 1);
- TS_ASSERT(buffer->mNumberChannels == 2);
- int byte_size = buffer->mDataByteSize;
- TS_ASSERT(byte_size == samples_requested_to_consume * bps);
-
- int zero_bytes = tsPullBytes(ctx, buffer->mData, byte_size);
- memset(((char*)buffer->mData) + (byte_size - zero_bytes), 0, zero_bytes);
-
- return noErr;
-}
-
-#elif TS_PLATFORM == TS_SDL
-
-static void tsSDL_AudioCallback(void* udata, Uint8* stream, int len)
-{
- tsContext* ctx = (tsContext*)udata;
- int zero_bytes = tsPullBytes(ctx, stream, len);
- memset(stream + (len - zero_bytes), 0, zero_bytes);
-}
-
-#endif
-
-static void tsPitchShift(float pitchShift, int num_samples_to_process, float sampleRate, float* indata, tsPitchData** pitch_filter);
-
-// Pitch processing tunables
-#define TS_MAX_FRAME_LENGTH 4096
-#define TS_PITCH_FRAME_SIZE 512
-#define TS_PITCH_QUALITY 8
-
-// interals
-#define TS_STEPSIZE (TS_PITCH_FRAME_SIZE / TS_PITCH_QUALITY)
-#define TS_OVERLAP (TS_PITCH_FRAME_SIZE - TS_STEPSIZE)
-#define TS_EXPECTED_FREQUENCY (2.0f * 3.14159265359f * (float)TS_STEPSIZE / (float)TS_PITCH_FRAME_SIZE)
-
-// TODO:
-// Use a memory pool for these things. For now they are just malloc16'd/free16'd
-// Not high priority to use a pool, since pitch shifting is already really expensive,
-// and cost of malloc is dwarfed. But would be a nice-to-have for potential memory
-// fragmentation issues.
-typedef struct tsPitchData
-{
- float pitch_shifted_output_samples[TS_MAX_FRAME_LENGTH];
- float in_FIFO[TS_STEPSIZE + TS_PITCH_FRAME_SIZE];
- float out_FIFO[TS_STEPSIZE + TS_PITCH_FRAME_SIZE];
- float fft_data[2 * TS_PITCH_FRAME_SIZE];
- float previous_phase[TS_PITCH_FRAME_SIZE / 2 + 4];
- float sum_phase[TS_PITCH_FRAME_SIZE / 2 + 4];
- float window_accumulator[TS_STEPSIZE + TS_PITCH_FRAME_SIZE];
- float freq[TS_PITCH_FRAME_SIZE];
- float mag[TS_PITCH_FRAME_SIZE];
- float pitch_shift_workspace[TS_PITCH_FRAME_SIZE];
- int index;
-} tsPitchData;
-
-static void tsRemoveFilter(tsPlayingSound* playing)
-{
- for (int i = 0; i < 2; i++)
- {
- if (playing->pitch_filter[i])
- {
- free16(playing->pitch_filter[i]);
- playing->pitch_filter[i] = 0;
- }
- }
-}
-
-void tsMix(tsContext* ctx)
-{
- tsLock(ctx);
-
-#if TS_PLATFORM == TS_WINDOWS
-
- int byte_to_lock;
- int bytes_to_write;
- tsPosition(ctx, &byte_to_lock, &bytes_to_write);
-
- if (!bytes_to_write) goto unlock;
- int samples_to_write = bytes_to_write / ctx->bps;
-
-#elif TS_PLATFORM == TS_MAC || TS_PLATFORM == TS_SDL
-
- int samples_to_write = tsSamplesToMix(ctx);
- if (!samples_to_write) goto unlock;
- int bytes_to_write = samples_to_write * ctx->bps;
-
-#else
-#endif
-
- // clear mixer buffers
- int wide_count = samples_to_write / 4;
- TS_ASSERT(!(samples_to_write & 3));
-
- __m128* floatA = ctx->floatA;
- __m128* floatB = ctx->floatB;
- __m128 zero = _mm_set1_ps(0.0f);
-
- for (int i = 0; i < wide_count; ++i)
- {
- floatA[i] = zero;
- floatB[i] = zero;
- }
-
- // mix all playing sounds into the mixer buffers
- tsPlayingSound** ptr = &ctx->playing;
- while (*ptr)
- {
- tsPlayingSound* playing = *ptr;
- tsLoadedSound* loaded = playing->loaded_sound;
- __m128* cA = (__m128*)loaded->channels[0];
- __m128* cB = (__m128*)loaded->channels[1];
-
- // Attempted to play a sound with no audio.
- // Make sure the audio file was loaded properly. Check for
- // error messages in g_tsErrorReason.
- TS_ASSERT(cA);
-
- int mix_count = samples_to_write;
- int offset = playing->sample_index;
- int remaining = loaded->sample_count - offset;
- if (remaining < mix_count) mix_count = remaining;
- TS_ASSERT(remaining > 0);
-
- float vA0 = playing->volume0 * playing->pan0;
- float vB0 = playing->volume1 * playing->pan1;
- __m128 vA = _mm_set1_ps(vA0);
- __m128 vB = _mm_set1_ps(vB0);
-
- // skip sound if it's delay is longer than mix_count and
- // handle various delay cases
- int delay_offset = 0;
- if (offset < 0)
- {
- int samples_till_positive = -offset;
- int mix_leftover = mix_count - samples_till_positive;
-
- if (mix_leftover <= 0)
- {
- playing->sample_index += mix_count;
- goto get_next_playing_sound;
- }
-
- else
- {
- offset = 0;
- delay_offset = samples_till_positive;
- mix_count = mix_leftover;
- }
- }
- TS_ASSERT(!(delay_offset & 3));
-
- // immediately remove any inactive elements
- if (!playing->active || !ctx->running)
- goto remove;
-
- // skip all paused sounds
- if (playing->paused)
- goto get_next_playing_sound;
-
- // SIMD offets
- int mix_wide = (int)TS_ALIGN(mix_count, 4) / 4;
- int offset_wide = (int)TS_TRUNC(offset, 4) / 4;
- int delay_wide = (int)TS_ALIGN(delay_offset, 4) / 4;
-
- // use tsPitchShift to on-the-fly pitch shift some samples
- // only call this function if the user set a custom pitch value
- if (playing->pitch != 1.0f)
- {
- int sample_count = (mix_wide - 2 * delay_wide) * 4;
- int falling_behind = sample_count > TS_MAX_FRAME_LENGTH;
-
- // TS_MAX_FRAME_LENGTH represents max samples we can pitch shift in one go. In the event
- // that this process takes longer than the time required to play the actual sound, just
- // fall back to the original sound (non-pitch shifted). This will sound very ugly. To
- // prevent falling behind, make sure not to pitch shift too many sounds at once. Try tweaking
- // TS_PITCH_QUALITY to make it lower (must be a power of 2).
- if (!falling_behind)
- {
- tsPitchShift(playing->pitch, sample_count, (float)ctx->Hz, (float*)(cA + delay_wide + offset_wide), playing->pitch_filter);
- cA = (__m128 *)playing->pitch_filter[0]->pitch_shifted_output_samples;
-
- if (loaded->channel_count == 2)
- {
- tsPitchShift(playing->pitch, sample_count, (float)ctx->Hz, (float*)(cB + delay_wide + offset_wide), playing->pitch_filter + 1);
- cB = (__m128 *)playing->pitch_filter[1]->pitch_shifted_output_samples;
- }
-
- offset_wide = -delay_wide;
- }
- }
-
- // apply volume, load samples into float buffers
- switch (loaded->channel_count)
- {
- case 1:
- for (int i = delay_wide; i < mix_wide - delay_wide; ++i)
- {
- __m128 A = cA[i + offset_wide];
- __m128 B = _mm_mul_ps(A, vB);
- A = _mm_mul_ps(A, vA);
- floatA[i] = _mm_add_ps(floatA[i], A);
- floatB[i] = _mm_add_ps(floatB[i], B);
- }
- break;
-
- case 2:
- {
- for (int i = delay_wide; i < mix_wide - delay_wide; ++i)
- {
- __m128 A = cA[i + offset_wide];
- __m128 B = cB[i + offset_wide];
-
- A = _mm_mul_ps(A, vA);
- B = _mm_mul_ps(B, vB);
- floatA[i] = _mm_add_ps(floatA[i], A);
- floatB[i] = _mm_add_ps(floatB[i], B);
- }
- } break;
- }
-
- // playing list logic
- playing->sample_index += mix_count;
- if (playing->sample_index == loaded->sample_count)
- {
- if (playing->looped)
- {
- playing->sample_index = 0;
- goto get_next_playing_sound;
- }
-
- remove:
- playing->sample_index = 0;
- *ptr = (*ptr)->next;
- playing->next = 0;
- playing->active = 0;
-
- tsRemoveFilter(playing);
-
- // if using high-level API manage the tsPlayingSound memory ourselves
- if (ctx->playing_pool)
- {
- playing->next = ctx->playing_free;
- ctx->playing_free = playing;
- }
-
- // we already incremented next pointer, so don't do it again
- continue;
- }
-
- get_next_playing_sound:
- if (*ptr) ptr = &(*ptr)->next;
- else break;
- }
-
- // load all floats into 16 bit packed interleaved samples
-#if TS_PLATFORM == TS_WINDOWS
-
- __m128i* samples = ctx->samples;
- for (int i = 0; i < wide_count; ++i)
- {
- __m128i a = _mm_cvtps_epi32(floatA[i]);
- __m128i b = _mm_cvtps_epi32(floatB[i]);
- __m128i a0b0a1b1 = _mm_unpacklo_epi32(a, b);
- __m128i a2b2a3b3 = _mm_unpackhi_epi32(a, b);
- samples[i] = _mm_packs_epi32(a0b0a1b1, a2b2a3b3);
- }
- tsMemcpyToDS(ctx, (int16_t*)samples, byte_to_lock, bytes_to_write);
-
-#elif TS_PLATFORM == TS_MAC || TS_PLATFORM == TS_SDL
-
- // Since the ctx->samples array is already in use as a ring buffer
- // reusing floatA to store output is a good way to temporarly store
- // the final samples. Then a single ring buffer push can be used
- // afterwards. Pretty hacky, but whatever :)
- __m128i* samples = (__m128i*)floatA;
- memset(samples, 0, sizeof(__m128i) * wide_count);
- for (int i = 0; i < wide_count; ++i)
- {
- __m128i a = _mm_cvtps_epi32(floatA[i]);
- __m128i b = _mm_cvtps_epi32(floatB[i]);
- __m128i a0b0a1b1 = _mm_unpacklo_epi32(a, b);
- __m128i a2b2a3b3 = _mm_unpackhi_epi32(a, b);
- samples[i] = _mm_packs_epi32(a0b0a1b1, a2b2a3b3);
- }
- tsPushBytes(ctx, samples, bytes_to_write);
-
-#else
-#endif
-
-unlock:
- tsUnlock(ctx);
-}
-
-// TODO:
-// Try this optimization out (2N POINT REAL FFT USING AN N POINT COMPLEX FFT)
-// http://www.fftguru.com/fftguru.com.tutorial2.pdf
-
-#include <math.h>
-
-static uint32_t tsRev32(uint32_t x)
-{
- uint32_t a = ((x & 0xAAAAAAAA) >> 1) | ((x & 0x55555555) << 1);
- uint32_t b = ((a & 0xCCCCCCCC) >> 2) | ((a & 0x33333333) << 2);
- uint32_t c = ((b & 0xF0F0F0F0) >> 4) | ((b & 0x0F0F0F0F) << 4);
- uint32_t d = ((c & 0xFF00FF00) >> 8) | ((c & 0x00FF00FF) << 8);
- return (d >> 16) | (d << 16);
-}
-
-static uint32_t tsPopCount(uint32_t x)
-{
- uint32_t a = x - ((x >> 1) & 0x55555555);
- uint32_t b = (((a >> 2) & 0x33333333) + (a & 0x33333333));
- uint32_t c = (((b >> 4) + b) & 0x0F0F0F0F);
- uint32_t d = c + (c >> 8);
- uint32_t e = d + (d >> 16);
- uint32_t f = e & 0x0000003F;
- return f;
-}
-
-static uint32_t tsLog2(uint32_t x)
-{
- uint32_t a = x | (x >> 1);
- uint32_t b = a | (a >> 2);
- uint32_t c = b | (b >> 4);
- uint32_t d = c | (c >> 8);
- uint32_t e = d | (d >> 16);
- uint32_t f = e >> 1;
- return tsPopCount(f);
-}
-
-// x contains real inputs
-// y contains imaginary inputs
-// count must be a power of 2
-// sign must be 1.0 (forward transform) or -1.0f (inverse transform)
-static void tsFFT(float* x, float* y, int count, float sign)
-{
- int exponent = (int)tsLog2((uint32_t)count);
-
- // bit reversal stage
- // swap all elements with their bit reversed index within the
- // lowest level of the Cooley-Tukey recursion tree
- for (int i = 1; i < count - 1; i++)
- {
- uint32_t j = tsRev32((uint32_t)i);
- j >>= (32 - exponent);
- if (i < (int)j)
- {
- float tx = x[i];
- float ty = y[i];
- x[i] = x[j];
- y[i] = y[j];
- x[j] = tx;
- y[j] = ty;
- }
- }
-
- // for each recursive iteration
- for (int iter = 0, L = 1; iter < exponent; ++iter)
- {
- int Ls = L;
- L <<= 1;
- float ur = 1.0f; // cos( pi / 2 )
- float ui = 0; // sin( pi / 2 )
- float arg = 3.14159265359f / (float)Ls;
- float wr = cosf(arg);
- float wi = -sign * sinf(arg);
-
- // rows in DFT submatrix
- for (int j = 0; j < Ls; ++j)
- {
- // do butterflies upon DFT row elements
- for (int i = j; i < count; i += L)
- {
- int index = i + Ls;
- float x_index = x[index];
- float y_index = y[index];
- float x_i = x[i];
- float y_i = y[i];
-
- float tr = ur * x_index - ui * y_index;
- float ti = ur * y_index + ui * x_index;
- float x_low = x_i - tr;
- float x_high = x_i + tr;
- float y_low = y_i - ti;
- float y_high = y_i + ti;
-
- x[index] = x_low;
- y[index] = y_low;
- x[i] = x_high;
- y[i] = y_high;
- }
-
- // Rotate u1 and u2 via Givens rotations (2d planar rotation).
- // This keeps cos/sin calls in the outermost loop.
- // Floating point error is scaled proportionally to Ls.
- float t = ur * wr - ui * wi;
- ui = ur * wi + ui * wr;
- ur = t;
- }
- }
-
- // scale factor for forward transform
- if (sign > 0)
- {
- float inv_count = 1.0f / (float)count;
- for (int i = 0; i < count; i++)
- {
- x[i] *= inv_count;
- y[i] *= inv_count;
- }
- }
-}
-
-#ifdef _MSC_VER
-
-#define TS_ALIGN16_0 __declspec( align( 16 ) )
-#define TS_ALIGN16_1
-#define TS_SELECTANY extern const __declspec( selectany )
-
-#else
-
-#define TS_ALIGN16_0
-#define TS_ALIGN16_1 __attribute__( (aligned( 16 )) )
-#define TS_SELECTANY const __attribute__( (selectany) )
-
-#endif
-
-// SSE2 trig funcs from https://github.com/to-miz/sse_mathfun_extension/
-#define _PS_CONST( Name, Val ) \
- TS_SELECTANY TS_ALIGN16_0 float _ps_##Name[ 4 ] TS_ALIGN16_1 = { Val, Val, Val, Val }
-
-#define _PS_CONST_TYPE( Name, Type, Val ) \
- TS_SELECTANY TS_ALIGN16_0 Type _ps_##Name[ 4 ] TS_ALIGN16_1 = { Val, Val, Val, Val }
-
-#define _PI32_CONST( Name, Val ) \
- TS_SELECTANY TS_ALIGN16_0 int _pi32_##Name[ 4 ] TS_ALIGN16_1 = { Val, Val, Val, Val }
-
-_PS_CONST_TYPE(sign_mask, int, (int)0x80000000);
-_PS_CONST_TYPE(inv_sign_mask, int, (int)~0x80000000);
-
-_PS_CONST(atanrange_hi, 2.414213562373095f);
-_PS_CONST(atanrange_lo, 0.4142135623730950f);
-_PS_CONST(cephes_PIO2F, 1.5707963267948966192f);
-_PS_CONST(cephes_PIO4F, 0.7853981633974483096f);
-_PS_CONST(1, 1.0f);
-_PS_CONST(0p5, 0.5f);
-_PS_CONST(0, 0);
-_PS_CONST(sincof_p0, -1.9515295891E-4f);
-_PS_CONST(sincof_p1, 8.3321608736E-3f);
-_PS_CONST(sincof_p2, -1.6666654611E-1f);
-_PS_CONST(atancof_p0, 8.05374449538e-2f);
-_PS_CONST(atancof_p1, 1.38776856032E-1f);
-_PS_CONST(atancof_p2, 1.99777106478E-1f);
-_PS_CONST(atancof_p3, 3.33329491539E-1f);
-_PS_CONST(cephes_PIF, 3.141592653589793238f);
-_PS_CONST(cephes_2PIF, 2.0f * 3.141592653589793238f);
-_PS_CONST(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
-_PS_CONST(minus_cephes_DP1, -0.78515625f);
-_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4f);
-_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8f);
-_PS_CONST(coscof_p0, 2.443315711809948E-005f);
-_PS_CONST(coscof_p1, -1.388731625493765E-003f);
-_PS_CONST(coscof_p2, 4.166664568298827E-002f);
-_PS_CONST(frame_size, (float)TS_PITCH_FRAME_SIZE);
-
-_PI32_CONST(1, 1);
-_PI32_CONST(inv1, ~1);
-_PI32_CONST(2, 2);
-_PI32_CONST(4, 4);
-
-static __m128 _mm_atan_ps(__m128 x)
-{
- __m128 sign_bit, y;
-
- sign_bit = x;
- /* take the absolute value */
- x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask);
- /* extract the sign bit (upper one) */
- sign_bit = _mm_and_ps(sign_bit, *(__m128*)_ps_sign_mask);
-
- /* range reduction, init x and y depending on range */
- /* x > 2.414213562373095 */
- __m128 cmp0 = _mm_cmpgt_ps(x, *(__m128*)_ps_atanrange_hi);
- /* x > 0.4142135623730950 */
- __m128 cmp1 = _mm_cmpgt_ps(x, *(__m128*)_ps_atanrange_lo);
-
- /* x > 0.4142135623730950 && !( x > 2.414213562373095 ) */
- __m128 cmp2 = _mm_andnot_ps(cmp0, cmp1);
-
- /* -( 1.0/x ) */
- __m128 y0 = _mm_and_ps(cmp0, *(__m128*)_ps_cephes_PIO2F);
- __m128 x0 = _mm_div_ps(*(__m128*)_ps_1, x);
- x0 = _mm_xor_ps(x0, *(__m128*)_ps_sign_mask);
-
- __m128 y1 = _mm_and_ps(cmp2, *(__m128*)_ps_cephes_PIO4F);
- /* (x-1.0)/(x+1.0) */
- __m128 x1_o = _mm_sub_ps(x, *(__m128*)_ps_1);
- __m128 x1_u = _mm_add_ps(x, *(__m128*)_ps_1);
- __m128 x1 = _mm_div_ps(x1_o, x1_u);
-
- __m128 x2 = _mm_and_ps(cmp2, x1);
- x0 = _mm_and_ps(cmp0, x0);
- x2 = _mm_or_ps(x2, x0);
- cmp1 = _mm_or_ps(cmp0, cmp2);
- x2 = _mm_and_ps(cmp1, x2);
- x = _mm_andnot_ps(cmp1, x);
- x = _mm_or_ps(x2, x);
-
- y = _mm_or_ps(y0, y1);
-
- __m128 zz = _mm_mul_ps(x, x);
- __m128 acc = *(__m128*)_ps_atancof_p0;
- acc = _mm_mul_ps(acc, zz);
- acc = _mm_sub_ps(acc, *(__m128*)_ps_atancof_p1);
- acc = _mm_mul_ps(acc, zz);
- acc = _mm_add_ps(acc, *(__m128*)_ps_atancof_p2);
- acc = _mm_mul_ps(acc, zz);
- acc = _mm_sub_ps(acc, *(__m128*)_ps_atancof_p3);
- acc = _mm_mul_ps(acc, zz);
- acc = _mm_mul_ps(acc, x);
- acc = _mm_add_ps(acc, x);
- y = _mm_add_ps(y, acc);
-
- /* update the sign */
- y = _mm_xor_ps(y, sign_bit);
-
- return y;
-}
-
-static __m128 _mm_atan2_ps(__m128 y, __m128 x)
-{
- __m128 x_eq_0 = _mm_cmpeq_ps(x, *(__m128*)_ps_0);
- __m128 x_gt_0 = _mm_cmpgt_ps(x, *(__m128*)_ps_0);
- __m128 x_le_0 = _mm_cmple_ps(x, *(__m128*)_ps_0);
- __m128 y_eq_0 = _mm_cmpeq_ps(y, *(__m128*)_ps_0);
- __m128 x_lt_0 = _mm_cmplt_ps(x, *(__m128*)_ps_0);
- __m128 y_lt_0 = _mm_cmplt_ps(y, *(__m128*)_ps_0);
-
- __m128 zero_mask = _mm_and_ps(x_eq_0, y_eq_0);
- __m128 zero_mask_other_case = _mm_and_ps(y_eq_0, x_gt_0);
- zero_mask = _mm_or_ps(zero_mask, zero_mask_other_case);
-
- __m128 pio2_mask = _mm_andnot_ps(y_eq_0, x_eq_0);
- __m128 pio2_mask_sign = _mm_and_ps(y_lt_0, *(__m128*)_ps_sign_mask);
- __m128 pio2_result = *(__m128*)_ps_cephes_PIO2F;
- pio2_result = _mm_xor_ps(pio2_result, pio2_mask_sign);
- pio2_result = _mm_and_ps(pio2_mask, pio2_result);
-
- __m128 pi_mask = _mm_and_ps(y_eq_0, x_le_0);
- __m128 pi = *(__m128*)_ps_cephes_PIF;
- __m128 pi_result = _mm_and_ps(pi_mask, pi);
-
- __m128 swap_sign_mask_offset = _mm_and_ps(x_lt_0, y_lt_0);
- swap_sign_mask_offset = _mm_and_ps(swap_sign_mask_offset, *(__m128*)_ps_sign_mask);
-
- __m128 offset0 = _mm_setzero_ps();
- __m128 offset1 = *(__m128*)_ps_cephes_PIF;
- offset1 = _mm_xor_ps(offset1, swap_sign_mask_offset);
-
- __m128 offset = _mm_andnot_ps(x_lt_0, offset0);
- offset = _mm_and_ps(x_lt_0, offset1);
-
- __m128 arg = _mm_div_ps(y, x);
- __m128 atan_result = _mm_atan_ps(arg);
- atan_result = _mm_add_ps(atan_result, offset);
-
- /* select between zero_result, pio2_result and atan_result */
-
- __m128 result = _mm_andnot_ps(zero_mask, pio2_result);
- atan_result = _mm_andnot_ps(pio2_mask, atan_result);
- atan_result = _mm_andnot_ps(pio2_mask, atan_result);
- result = _mm_or_ps(result, atan_result);
- result = _mm_or_ps(result, pi_result);
-
- return result;
-}
-
-static void _mm_sincos_ps(__m128 x, __m128 *s, __m128 *c)
-{
- __m128 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
- __m128i emm0, emm2, emm4;
- sign_bit_sin = x;
- /* take the absolute value */
- x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask);
- /* extract the sign bit (upper one) */
- sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask);
-
- /* scale by 4/Pi */
- y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI);
-
- /* store the integer part of y in emm2 */
- emm2 = _mm_cvttps_epi32(y);
-
- /* j=(j+1) & (~1) (see the cephes sources) */
- emm2 = _mm_add_epi32(emm2, *(__m128i*)_pi32_1);
- emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_inv1);
- y = _mm_cvtepi32_ps(emm2);
-
- emm4 = emm2;
-
- /* get the swap sign flag for the sine */
- emm0 = _mm_and_si128(emm2, *(__m128i*)_pi32_4);
- emm0 = _mm_slli_epi32(emm0, 29);
- __m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0);
-
- /* get the polynom selection mask for the sine*/
- emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_2);
- emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
- __m128 poly_mask = _mm_castsi128_ps(emm2);
-
- /* The magic pass: "Extended precision modular arithmetic"
- x = ((x - y * DP1) - y * DP2) - y * DP3; */
- xmm1 = *(__m128*)_ps_minus_cephes_DP1;
- xmm2 = *(__m128*)_ps_minus_cephes_DP2;
- xmm3 = *(__m128*)_ps_minus_cephes_DP3;
- xmm1 = _mm_mul_ps(y, xmm1);
- xmm2 = _mm_mul_ps(y, xmm2);
- xmm3 = _mm_mul_ps(y, xmm3);
- x = _mm_add_ps(x, xmm1);
- x = _mm_add_ps(x, xmm2);
- x = _mm_add_ps(x, xmm3);
-
- emm4 = _mm_sub_epi32(emm4, *(__m128i*)_pi32_2);
- emm4 = _mm_andnot_si128(emm4, *(__m128i*)_pi32_4);
- emm4 = _mm_slli_epi32(emm4, 29);
- __m128 sign_bit_cos = _mm_castsi128_ps(emm4);
-
- sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
-
-
- /* Evaluate the first polynom (0 <= x <= Pi/4) */
- __m128 z = _mm_mul_ps(x, x);
- y = *(__m128*)_ps_coscof_p0;
-
- y = _mm_mul_ps(y, z);
- y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1);
- y = _mm_mul_ps(y, z);
- y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2);
- y = _mm_mul_ps(y, z);
- y = _mm_mul_ps(y, z);
- __m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5);
- y = _mm_sub_ps(y, tmp);
- y = _mm_add_ps(y, *(__m128*)_ps_1);
-
- /* Evaluate the second polynom (Pi/4 <= x <= 0) */
-
- __m128 y2 = *(__m128*)_ps_sincof_p0;
- y2 = _mm_mul_ps(y2, z);
- y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1);
- y2 = _mm_mul_ps(y2, z);
- y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2);
- y2 = _mm_mul_ps(y2, z);
- y2 = _mm_mul_ps(y2, x);
- y2 = _mm_add_ps(y2, x);
-
- /* select the correct result from the two polynoms */
- xmm3 = poly_mask;
- __m128 ysin2 = _mm_and_ps(xmm3, y2);
- __m128 ysin1 = _mm_andnot_ps(xmm3, y);
- y2 = _mm_sub_ps(y2, ysin2);
- y = _mm_sub_ps(y, ysin1);
-
- xmm1 = _mm_add_ps(ysin1, ysin2);
- xmm2 = _mm_add_ps(y, y2);
-
- /* update the sign */
- *s = _mm_xor_ps(xmm1, sign_bit_sin);
- *c = _mm_xor_ps(xmm2, sign_bit_cos);
-}
-
-static __m128i select_si(__m128i a, __m128i b, __m128i mask)
-{
- return _mm_xor_si128(a, _mm_and_si128(mask, _mm_xor_si128(b, a)));
-}
-
-#define tsVonHann( i ) (-0.5f * cosf( 2.0f * 3.14159265359f * (float)(i) / (float)TS_PITCH_FRAME_SIZE ) + 0.5f)
-
-static __m128 tsVonHann4(int i)
-{
- __m128 k4 = _mm_set_ps((float)(i * 4 + 3), (float)(i * 4 + 2), (float)(i * 4 + 1), (float)(i * 4));
- k4 = _mm_mul_ps(*(__m128*)_ps_cephes_2PIF, k4);
- k4 = _mm_div_ps(k4, *(__m128*)_ps_frame_size);
-
- // Seems like _mm_cos_ps and _mm_sincos_ps was causing some audio popping...
- // I'm not really skilled enough to fix it, but feel free to try: http://gruntthepeon.free.fr/ssemath/sse_mathfun.h
- // My guess is some large negative or positive values were causing some
- // precision trouble. In this case manually calling 4 cosines is not
- // really a big deal, since this function is not a bottleneck.
-
-#if 0
- __m128 c = _mm_cos_ps(k4);
-#elif 0
- __m128 s, c;
- _mm_sincos_ps(k4, &s, &c);
-#else
- __m128 c = k4;
- float* cf = (float*)&c;
- cf[0] = cosf(cf[0]);
- cf[1] = cosf(cf[1]);
- cf[2] = cosf(cf[2]);
- cf[3] = cosf(cf[3]);
-#endif
-
- __m128 von_hann = _mm_add_ps(_mm_mul_ps(_mm_set_ps1(-0.5f), c), _mm_set_ps1(0.5f));
- return von_hann;
-}
-
-// Analysis and synthesis steps learned from Bernsee's wonderful blog post:
-// http://blogs.zynaptiq.com/bernsee/pitch-shifting-using-the-ft/
-static void tsPitchShift(float pitchShift, int num_samples_to_process, float sampleRate, float* indata, tsPitchData** pitch_filter)
-{
- TS_ASSERT(num_samples_to_process <= TS_MAX_FRAME_LENGTH);
-
- // make sure compiler didn't do anything weird with the member
- // offsets of tsPitchData. All arrays must be 16 byte aligned
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->pitch_shifted_output_samples) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->fft_data) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->previous_phase) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->sum_phase) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->window_accumulator) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->freq) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->mag) & 15));
- TS_ASSERT(!((size_t)&(((tsPitchData*)0)->pitch_shift_workspace) & 15));
-
- tsPitchData* pf;
-
- if (*pitch_filter == NULL)
- {
- pf = (tsPitchData*)malloc16(sizeof(tsPitchData));
- memset(pf, 0, sizeof(tsPitchData));
- *pitch_filter = pf;
- }
- else
- {
- pf = *pitch_filter;
- }
-
- float freqPerBin = sampleRate / (float)TS_PITCH_FRAME_SIZE;
- __m128 freq_per_bin = _mm_set_ps1(sampleRate / (float)TS_PITCH_FRAME_SIZE);
- __m128 pi = *(__m128*)_ps_cephes_PIF;
- __m128 two_pi = *(__m128*)_ps_cephes_2PIF;
- __m128 pitch_quality = _mm_set_ps1((float)TS_PITCH_QUALITY);
- float* out_samples = pf->pitch_shifted_output_samples;
- if (pf->index == 0) pf->index = TS_OVERLAP;
-
- while (num_samples_to_process)
- {
- int copy_count = TS_PITCH_FRAME_SIZE - pf->index;
- if (num_samples_to_process < copy_count) copy_count = num_samples_to_process;
-
- memcpy(pf->in_FIFO + pf->index, indata, sizeof(float) * copy_count);
- memcpy(out_samples, pf->out_FIFO + pf->index - TS_OVERLAP, sizeof(float) * copy_count);
-
- int start_index = pf->index;
- int offset = start_index & 3;
- start_index += 4 - offset;
-
- for (int i = 0; i < offset; ++i)
- pf->in_FIFO[pf->index + i] /= 32768.0f;
-
- int extra = copy_count & 3;
- copy_count = copy_count / 4 - extra;
- __m128* in_FIFO = (__m128*)(pf->in_FIFO + pf->index + offset);
- TS_ASSERT(!((size_t)in_FIFO & 15));
- __m128 int16_max = _mm_set_ps1(32768.0f);
-
- for (int i = 0; i < copy_count; ++i)
- {
- __m128 val = in_FIFO[i];
- __m128 div = _mm_div_ps(val, int16_max);
- in_FIFO[i] = div;
- }
-
- for (int i = 0, copy_count4 = copy_count * 4; i < extra; ++i)
- {
- int index = copy_count4 + i;
- pf->in_FIFO[pf->index + index] /= 32768.0f;
- }
-
- TS_ASSERT(!((size_t)out_samples & 15));
- __m128* out_samples4 = (__m128*)out_samples;
- for (int i = 0; i < copy_count; ++i)
- {
- __m128 val = out_samples4[i];
- __m128 mul = _mm_mul_ps(val, int16_max);
- out_samples4[i] = mul;
- }
-
- for (int i = 0, copy_count4 = copy_count * 4; i < extra; ++i)
- {
- int index = copy_count4 + i;
- out_samples[index] *= 32768.0f;
- }
-
- copy_count = copy_count * 4 + extra;
- num_samples_to_process -= copy_count;
- pf->index += copy_count;
- indata += copy_count;
- out_samples += copy_count;
-
- if (pf->index >= TS_PITCH_FRAME_SIZE)
- {
- pf->index = TS_OVERLAP;
- {
- __m128* fft_data = (__m128*)pf->fft_data;
- __m128* in_FIFO = (__m128*)pf->in_FIFO;
-
- for (int k = 0; k < TS_PITCH_FRAME_SIZE / 4; k++)
- {
- __m128 von_hann = tsVonHann4(k);
- __m128 sample = in_FIFO[k];
- __m128 windowed_sample = _mm_mul_ps(sample, von_hann);
- fft_data[k] = windowed_sample;
- }
- }
-
- memset(pf->fft_data + TS_PITCH_FRAME_SIZE, 0, TS_PITCH_FRAME_SIZE * sizeof(float));
- tsFFT(pf->fft_data, pf->fft_data + TS_PITCH_FRAME_SIZE, TS_PITCH_FRAME_SIZE, 1.0f);
-
- {
- __m128* fft_data = (__m128*)pf->fft_data;
- __m128* previous_phase = (__m128*)pf->previous_phase;
- __m128* magnitudes = (__m128*)pf->mag;
- __m128* frequencies = (__m128*)pf->freq;
- int simd_count = (TS_PITCH_FRAME_SIZE / 2) / 4;
-
- for (int k = 0; k <= simd_count; k++)
- {
- __m128 real = fft_data[k];
- __m128 imag = fft_data[(TS_PITCH_FRAME_SIZE / 4) + k];
- __m128 overlap_phase = _mm_set_ps((float)(k * 4 + 3) * TS_EXPECTED_FREQUENCY, (float)(k * 4 + 2) * TS_EXPECTED_FREQUENCY, (float)(k * 4 + 1) * TS_EXPECTED_FREQUENCY, (float)(k * 4) * TS_EXPECTED_FREQUENCY);
- __m128 k4 = _mm_set_ps((float)(k * 4 + 3), (float)(k * 4 + 2), (float)(k * 4 + 1), (float)(k * 4));
-
- __m128 mag = _mm_mul_ps(_mm_set_ps1(2.0f), _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(real, real), _mm_mul_ps(imag, imag))));
- __m128 phase = _mm_atan2_ps(imag, real);
- __m128 phase_dif = _mm_sub_ps(phase, previous_phase[k]);
-
- previous_phase[k] = phase;
- phase_dif = _mm_sub_ps(phase_dif, overlap_phase);
-
- // map delta phase into +/- pi interval
- __m128i qpd = _mm_cvttps_epi32(_mm_div_ps(phase_dif, pi));
- __m128i zero = _mm_setzero_si128();
- __m128i ltzero_mask = _mm_cmplt_epi32(qpd, zero);
- __m128i ones_bit = _mm_and_si128(qpd, _mm_set1_epi32(1));
- __m128i neg_qpd = _mm_sub_epi32(qpd, ones_bit);
- __m128i pos_qpd = _mm_add_epi32(qpd, ones_bit);
- qpd = select_si(pos_qpd, neg_qpd, ltzero_mask);
- __m128 pi_range_offset = _mm_mul_ps(pi, _mm_cvtepi32_ps(qpd));
- phase_dif = _mm_sub_ps(phase_dif, pi_range_offset);
-
- __m128 deviation = _mm_div_ps(_mm_mul_ps(_mm_set_ps1((float)TS_PITCH_QUALITY), phase_dif), two_pi);
- __m128 true_freq_estimated = _mm_add_ps(_mm_mul_ps(k4, freq_per_bin), _mm_mul_ps(deviation, freq_per_bin));
-
- magnitudes[k] = mag;
- frequencies[k] = true_freq_estimated;
- }
- }
-
- // actual pitch shifting work
- // shift frequencies into workspace
- memset(pf->pitch_shift_workspace, 0, (TS_PITCH_FRAME_SIZE / 2) * sizeof(float));
- for (int k = 0; k <= TS_PITCH_FRAME_SIZE / 2; k++)
- {
- int index = (int)(k * pitchShift);
- if (index <= TS_PITCH_FRAME_SIZE / 2)
- pf->pitch_shift_workspace[index] = pf->freq[k] * pitchShift;
- }
-
- // swap buffers around to reuse old pf->preq buffer as the new workspace
- float* frequencies = pf->pitch_shift_workspace;
- float* pitch_shift_workspace = pf->freq;
- float* magnitudes = pf->mag;
-
- // shift magnitudes into workspace
- memset(pitch_shift_workspace, 0, TS_PITCH_FRAME_SIZE * sizeof(float));
- for (int k = 0; k <= TS_PITCH_FRAME_SIZE / 2; k++)
- {
- int index = (int)(k * pitchShift);
- if (index <= TS_PITCH_FRAME_SIZE / 2)
- pitch_shift_workspace[index] += magnitudes[k];
- }
-
- // track where the shifted magnitudes are
- magnitudes = pitch_shift_workspace;
-
- {
- __m128* magnitudes4 = (__m128*)magnitudes;
- __m128* frequencies4 = (__m128*)frequencies;
- __m128* fft_data = (__m128*)pf->fft_data;
- __m128* sum_phase = (__m128*)pf->sum_phase;
- int simd_count = (TS_PITCH_FRAME_SIZE / 2) / 4;
-
- for (int k = 0; k <= simd_count; k++)
- {
- __m128 mag = magnitudes4[k];
- __m128 freq = frequencies4[k];
- __m128 freq_per_bin_k = _mm_set_ps((float)(k * 4 + 3) * freqPerBin, (float)(k * 4 + 2) * freqPerBin, (float)(k * 4 + 1) * freqPerBin, (float)(k * 4) * freqPerBin);
-
- freq = _mm_sub_ps(freq, freq_per_bin_k);
- freq = _mm_div_ps(freq, freq_per_bin);
-
- freq = _mm_mul_ps(two_pi, freq);
- freq = _mm_div_ps(freq, pitch_quality);
-
- __m128 overlap_phase = _mm_set_ps((float)(k * 4 + 3) * TS_EXPECTED_FREQUENCY, (float)(k * 4 + 2) * TS_EXPECTED_FREQUENCY, (float)(k * 4 + 1) * TS_EXPECTED_FREQUENCY, (float)(k * 4) * TS_EXPECTED_FREQUENCY);
- freq = _mm_add_ps(freq, overlap_phase);
-
- __m128 phase = sum_phase[k];
- phase = _mm_add_ps(phase, freq);
- sum_phase[k] = phase;
-
- __m128 c, s;
- _mm_sincos_ps(phase, &s, &c);
- __m128 real = _mm_mul_ps(mag, c);
- __m128 imag = _mm_mul_ps(mag, s);
-
- fft_data[k] = real;
- fft_data[(TS_PITCH_FRAME_SIZE / 4) + k] = imag;
- }
- }
-
- for (int k = TS_PITCH_FRAME_SIZE + 2; k < 2 * TS_PITCH_FRAME_SIZE - 2; ++k)
- pf->fft_data[k] = 0;
-
- tsFFT(pf->fft_data, pf->fft_data + TS_PITCH_FRAME_SIZE, TS_PITCH_FRAME_SIZE, -1);
-
- {
- __m128* fft_data = (__m128*)pf->fft_data;
- __m128* window_accumulator = (__m128*)pf->window_accumulator;
-
- for (int k = 0; k < TS_PITCH_FRAME_SIZE / 4; ++k)
- {
- __m128 von_hann = tsVonHann4(k);
- __m128 fft_data_segment = fft_data[k];
- __m128 accumulator_segment = window_accumulator[k];
- __m128 divisor = _mm_div_ps(pitch_quality, _mm_set_ps1(8.0f));
- fft_data_segment = _mm_mul_ps(von_hann, fft_data_segment);
- fft_data_segment = _mm_div_ps(fft_data_segment, divisor);
- accumulator_segment = _mm_add_ps(accumulator_segment, fft_data_segment);
- window_accumulator[k] = accumulator_segment;
- }
- }
-
- memcpy(pf->out_FIFO, pf->window_accumulator, TS_STEPSIZE * sizeof(float));
- memmove(pf->window_accumulator, pf->window_accumulator + TS_STEPSIZE, TS_PITCH_FRAME_SIZE * sizeof(float));
- memmove(pf->in_FIFO, pf->in_FIFO + TS_STEPSIZE, TS_OVERLAP * sizeof(float));
- }
- }
-}
-
-/*
-zlib license:
-
-Copyright (c) 2017 Randy Gaul http://www.randygaul.net
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from
-the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-1. The origin of this software must not be misrepresented; you must not
-claim that you wrote the original software. If you use this software
-in a product, an acknowledgment in the product documentation would be
-appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not
-be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#endif