/* Analyseur de MUsique et ENtraƮnement au CHAnt This file is released under either of the two licenses below, your choice: - LGPL v2.1 or later, https://www.gnu.org The GNU Lesser General Public Licence, version 2.1 or, at your option, any later version. - CeCILL-C, http://www.cecill.info The CeCILL-C license is more adapted to the French laws, but can be converted to the GNU LGPL. You can use, modify and/or redistribute the software under the terms of any of these licences, which should have been provided to you together with this sofware. If that is not the case, you can find a copy of the licences on the indicated web sites. By Nicolas . Brodu @ Inria . fr See http://nicolas.brodu.net/programmation/amuencha/ for more information */ #include #include #include #include #include #include #include #include "sse_mathfun.h" #include "AmuenchaWorker.hpp" using namespace std; using namespace boost::math::float_constants; Amuencha::Model::worker::worker() : sampling_rate{} , min_midi_note{24} , max_midi_note{72} , periods{20} , max_buffer_duration{500} { status = NO_DATA; } Amuencha::Model::worker::~worker() { mutex.lock(); status = QUIT_NOW; // Do not even wait the end of a cycle, quit condition.notify_one(); mutex.unlock(); // wait(CYCLE_PERIOD * 2); terminate(); // wait(); // until run terminates } void Amuencha::Model::worker::new_data(float* chunk, int size) { // producer, called from another thread. // Must not wake the main thread uselessly to notify that new data is available // The idea is that the new_data may be called very fast from a RT audio thread // but that frequencies are computed at most 10 times/second (or whatever the cyclic period is set) // => decouple thread frequencies // And when no data is here (recording off, no song...), the main thread is fully passive (no CPU hog) mutex.lock(); // Store a pointer here and not a full data copy, which is much faster // The real data is stored in the AudioRecording object // Kind of duplicates the AudioRecording list, however: // - This way, there is no need for mutex/lock in the AudioRecording structure // - The position of the last unprocessed chunk within that structure need not be stored there chunks.emplace_back(make_pair(chunk, size)); // set the flag that data has arrived status = HAS_NEW_DATA; // IF AND ONLY IF the thread was blocked forever, then wake it up if (waiting_time != CYCLE_PERIOD) { // and now resume the cyclic scheduling waiting_time = CYCLE_PERIOD; condition.wakeOne(); } // Otherwise, do NOT wake the other thread, keep the low-freq cycle to decrease load mutex.unlock(); } void Amuencha::Model::worker::set_min_max_notes(int min_midi_note, int max_midi_note) { // Block data processing while changing the data structures data_mutex.lock(); if (max_midi_note < min_midi_note) std::swap(min_midi_note, max_midi_note); this->min_midi_note = std::clamp(min_midi_note, 0, 127); this->max_midi_note = std::clamp(max_midi_note, 0, 127); setup(); // Processing can resume with the new data structures in place. data_mutex.unlock(); } void Amuencha::Model::worker::set_periods(int periods) { // Block data processing while changing the data structures data_mutex.lock(); this->periods = std::clamp(periods, 1, 99); setup(); // Processing can resume with the new data structures in place. data_mutex.unlock(); } void Amuencha::Model::worker::setup(float sampling_rate, PowerHandler&& handler, int min_midi_note, int max_midi_note, int periods, int max_buffer_duration) { // Block data processing while changing the data structures data_mutex.lock(); this->sampling_rate = sampling_rate; this->samplerate_div_2pi = sampling_rate/two_pi; power_handler = std::move(handler); if (max_midi_note < min_midi_note) std::swap(min_midi_note, max_midi_note); this->min_midi_note = std::clamp(min_midi_note, 0, 127); this->max_midi_note = std::clamp(max_midi_note, 0, 127); this->periods = std::clamp(periods, 1, 99); this->max_buffer_duration = max_buffer_duration; setup(); // Processing can resume with the new data structures in place. data_mutex.unlock(); } void Amuencha::Model::worker::setup() { // Start with A440, but this could be parametrizable as well const float fref = 440; const float log2_fref = log2(fref); const int aref = 69; // use the midi numbering scheme, because why not float log2_fmin = (min_midi_note - aref) / 12. + log2_fref; float log2_fmax = (max_midi_note - aref) / 12. + log2_fref; int num_bins = max_midi_note - min_midi_note; frequencies.resize(num_bins); for (int b{0}; b < num_bins; ++b) { float bratio = (float)b / (num_bins - 1.); frequencies[b] = exp2(log2_fmin + (log2_fmax - log2_fmin) * bratio); } this->reassigned_frequencies = frequencies; this->power_spectrum.resize(frequencies.size()); // Prepare the windows //std::vector> windowed_sines; windowed_sines.resize(frequencies.size()); power_normalization_factors.resize(frequencies.size()); int big_buffer_size = 0; for (int idx{0}; idx < frequencies.size(); ++idx) { // for each freq, span at least 20 periods for more precise measurements // This still gives reasonable latencies, e.g. 50ms at 400Hz, 100ms at 200Hz, 400ms at 50Hz... // Could also span more for even better measurements, with larger // computation cost and latency float f = frequencies[idx]; int window_size = (int)(min(periods / f, max_buffer_duration * 0.001f) * sampling_rate); vector window(window_size); vector window_deriv(window_size); if (!read_from_cache(window, window_deriv)) { initialize_window(window); initialize_window_deriv(window_deriv); write_to_cache(window, window_deriv); } windowed_sines[idx].resize(window_size); float wsum = 0; for (int i{0}; i < window_size;) { if (i < window_size - 4) { v4sf tfs = { (float)(i - window_size - 1) / sampling_rate, (float)(i + 1 - window_size - 1) / sampling_rate, (float)(i + 2 - window_size - 1) / sampling_rate, (float)(i + 3 - window_size - 1) / sampling_rate }; tfs *= (float)(-two_pi * f); v4sf sin_tf, cos_tf; sincos_ps(tfs, &sin_tf, &cos_tf); for (int j{0}; j < 3; ++j) { v4sf ws = { cos_tf[j] * window[i+j], sin_tf[j] * window[i+j], cos_tf[j] * window_deriv[i+j], sin_tf[j] * window_deriv[i+j] }; windowed_sines[idx][i + j] = ws; wsum += window[i+j]; } i += 4; continue; } float t = (float)(i - window_size - 1) / sampling_rate; float re = cosf(-two_pi * t * f); float im = sinf(-two_pi * t * f); v4sf ws = { re * window[i], im * window[i], re * window_deriv[i], im * window_deriv[i] }; windowed_sines[idx][i] = ws; wsum += window[i]; ++i; } power_normalization_factors[idx] = 1. / (wsum * wsum); big_buffer_size = max(big_buffer_size, window_size); } big_buffer.clear(); // fill with 0 signal content to start with big_buffer.resize(big_buffer_size, 0.f); } void Amuencha::Model::worker::work() { // Solution with a wait condition + time // other possible solution = timer, but that would need to be stopped mutex.lock(); // waiting_time is a mutex-protected info waiting_time = CYCLE_PERIOD; // loop starts with mutex locked while (true) { condition.wait_for(&mutex, waiting_time); if (status == QUIT_NOW) break; if (status == HAS_NEW_DATA) { // Swap the chunks to a local variable, so: // - the class chunks becomes empty // - the audio thread can feed it more data while computing frequencies vector> chunks; chunks.swap(this->chunks); status = NO_DATA; // will be updated if new data indeed arrives mutex.unlock(); // Now, we can take the time to do the frequency computations data_mutex.lock(); int new_data_size = 0; for (auto c : chunks) new_data_size += c.second; // shift the old data to make room for the new int new_data_pos = big_buffer.size() - new_data_size; //cout << "data " << new_data_size << " " << big_buffer.size() << endl; // std::copy can cope with overlapping regions in this copy direction if (new_data_pos > 0) copy(big_buffer.begin() + new_data_size, big_buffer.end(), big_buffer.begin()); // now copy each chunk at its position in the big buffer for (auto c : chunks) { if (new_data_pos < 0) { // discard too old chunks if (c.second <= -new_data_pos) { new_data_pos += c.second; continue; } // partial copy of chunks that fall on the edge copy(c.first + new_data_pos, c.first+c.second, &big_buffer[0]); new_data_pos = c.second + new_data_pos; continue; } copy(c.first, c.first+c.second, &big_buffer[new_data_pos]); new_data_pos += c.second; } // Apply the filter bank float *bbend = &big_buffer[0] + big_buffer.size(); for (int idx{0}; idx < frequencies.size(); ++idx) { const auto& ws = windowed_sines[idx]; int wsize = ws.size(); float* sig = bbend - wsize; v4sf acc = {0.f, 0.f, 0.f, 0.f}; for (int i{0}; i < wsize; ++i) acc += ws[i] * sig[i]; float norm = acc[0] * acc[0] + acc[1] * acc[1]; float reassign = frequencies[idx]; if (norm > 0) reassign -= (acc[0] * acc[3] - acc[1] * acc[2]) * samplerate_div_2pi / norm; reassigned_frequencies[idx] = reassign; power_spectrum[idx] = norm * power_normalization_factors[idx]; } // Notify our listener that new power/frequency content has arrived power_handler(reassigned_frequencies, power_spectrum); // setup can now lock and change data structures if needed data_mutex.unlock(); // relock for the condition wait mutex.lock(); continue; } // No more data ? Force waiting until data arrives waiting_time = ULONG_MAX; // keep the lock for next loop } mutex.unlock(); } void Amuencha::Model::worker::invalidate_samples() { mutex.lock(); data_mutex.lock(); chunks.clear(); data_mutex.unlock(); mutex.unlock(); } void Amuencha::Model::worker::initialize_window(std::vector& window) { // Kaiser window with a parameter of alpha=3 that nullifies the window on edges int size = window.size(); const float two_over_N = 2./size; const float alpha = 3.; const float alpha_pi = alpha * pi; const float inv_denom = 1. /boost::math::cyl_bessel_i(0., alpha_pi); for (int i{0}; i < size; ++i) { float p = i * two_over_N - 1.; window[i] = boost::math::cyl_bessel_i(0., alpha_pi * sqrt(1. - p * p)) * inv_denom; } } void Amuencha::Model::worker::initialize_window_deriv(std::vector& window) { // Derivative of the Kaiser window with a parameter of alpha=3 that nullifies the window on edges int size = window.size(); const float two_over_N = 2./size; const float alpha = 3.; const float alpha_pi = alpha * pi; const float inv_denom = 1./boost::math::cyl_bessel_i(0., alpha_pi); for (int i{1}; i0 = 1/2 window[0] = 0.5 * inv_denom * alpha_pi * alpha_pi * two_over_N; } bool Amuencha::Model::worker::read_from_cache(std::vector& window, std::vector& window_deriv) { auto it = mem_win_cache.find(window.size()); if (it != mem_win_cache.end()) { window = it->second; auto itd = mem_winderiv_cache.find(window.size()); if (itd != mem_winderiv_cache.end()) { window_deriv = itd->second; return true; } // else, load from disk } // TODO: make the cache location parametrizable (and an option to not use it) ifstream file(".amuencha_cache/w" + to_string(window.size()) + ".bin", ios::binary); if (file.is_open()) { file.read(reinterpret_cast(&window[0]), window.size() * sizeof(float)); file.read(reinterpret_cast(&window_deriv[0]), window_deriv.size() * sizeof(float)); if (file.tellg() != (window.size() + window_deriv.size()) * sizeof(float)) { cerr << "Error: invalid cache .amuencha_cache/w" + to_string(window.size()) + ".bin \n"; return false; } return true; } return false; } void Amuencha::Model::worker::write_to_cache(std::vector& window, std::vector& window_deriv) { #if defined(_WIN32) || defined(_WIN64) mkdir(".amuencha_cache"); #else mkdir(".amuencha_cache", 0755); #endif ofstream file(".amuencha_cache/w" + to_string(window.size()) + ".bin", ios::binary | ios::trunc); file.write(reinterpret_cast(&window[0]), window.size() * sizeof(float)); file.write(reinterpret_cast(&window_deriv[0]), window_deriv.size() * sizeof(float)); mem_win_cache[window.size()] = window; mem_winderiv_cache[window.size()] = window_deriv; }