score-avnd-amuencha/Amuencha/AmuenchaWorker.cpp

428 lines
14 KiB
C++

/*
Analyseur de MUsique et ENtraînement au CHAnt
This file is released under either of the two licenses below, your choice:
- LGPL v2.1 or later, https://www.gnu.org
The GNU Lesser General Public Licence, version 2.1 or,
at your option, any later version.
- CeCILL-C, http://www.cecill.info
The CeCILL-C license is more adapted to the French laws,
but can be converted to the GNU LGPL.
You can use, modify and/or redistribute the software under the terms of any
of these licences, which should have been provided to you together with this
sofware. If that is not the case, you can find a copy of the licences on
the indicated web sites.
By Nicolas . Brodu @ Inria . fr
See http://nicolas.brodu.net/programmation/amuencha/ for more information
*/
#include <iostream>
#include <fstream>
#include <algorithm>
#include <cmath>
#include <boost/math/special_functions/bessel.hpp>
#include <boost/math/constants/constants.hpp>
#include <sys/stat.h>
#include "sse_mathfun.h"
#include "AmuenchaWorker.hpp"
using namespace std;
using namespace boost::math::float_constants;
Amuencha::Model::worker::worker()
: sampling_rate{}
, min_midi_note{24}
, max_midi_note{72}
, periods{20}
, max_buffer_duration{500}
{
status = NO_DATA;
}
Amuencha::Model::worker::~worker()
{
mutex.lock();
status = QUIT_NOW;
// Do not even wait the end of a cycle, quit
condition.notify_one();
mutex.unlock();
// wait(CYCLE_PERIOD * 2);
terminate();
// wait(); // until run terminates
}
void Amuencha::Model::worker::new_data(float* chunk, int size)
{
// producer, called from another thread.
// Must not wake the main thread uselessly to notify that new data is available
// The idea is that the new_data may be called very fast from a RT audio thread
// but that frequencies are computed at most 10 times/second (or whatever the cyclic period is set)
// => decouple thread frequencies
// And when no data is here (recording off, no song...), the main thread is fully passive (no CPU hog)
mutex.lock();
// Store a pointer here and not a full data copy, which is much faster
// The real data is stored in the AudioRecording object
// Kind of duplicates the AudioRecording list, however:
// - This way, there is no need for mutex/lock in the AudioRecording structure
// - The position of the last unprocessed chunk within that structure need not be stored there
chunks.emplace_back(make_pair(chunk, size));
// set the flag that data has arrived
status = HAS_NEW_DATA;
// IF AND ONLY IF the thread was blocked forever, then wake it up
if (waiting_time != CYCLE_PERIOD) {
// and now resume the cyclic scheduling
waiting_time = CYCLE_PERIOD;
condition.wakeOne();
}
// Otherwise, do NOT wake the other thread, keep the low-freq cycle to decrease load
mutex.unlock();
}
void Amuencha::Model::worker::set_min_max_notes(int min_midi_note, int max_midi_note)
{
// Block data processing while changing the data structures
data_mutex.lock();
if (max_midi_note < min_midi_note) std::swap(min_midi_note, max_midi_note);
this->min_midi_note = std::clamp(min_midi_note, 0, 127);
this->max_midi_note = std::clamp(max_midi_note, 0, 127);
setup();
// Processing can resume with the new data structures in place.
data_mutex.unlock();
}
void Amuencha::Model::worker::set_periods(int periods)
{
// Block data processing while changing the data structures
data_mutex.lock();
this->periods = std::clamp(periods, 1, 99);
setup();
// Processing can resume with the new data structures in place.
data_mutex.unlock();
}
void Amuencha::Model::worker::setup(float sampling_rate,
PowerHandler&& handler,
int min_midi_note,
int max_midi_note,
int periods,
int max_buffer_duration)
{
// Block data processing while changing the data structures
data_mutex.lock();
this->sampling_rate = sampling_rate;
this->samplerate_div_2pi = sampling_rate/two_pi;
power_handler = std::move(handler);
if (max_midi_note < min_midi_note) std::swap(min_midi_note, max_midi_note);
this->min_midi_note = std::clamp(min_midi_note, 0, 127);
this->max_midi_note = std::clamp(max_midi_note, 0, 127);
this->periods = std::clamp(periods, 1, 99);
this->max_buffer_duration = max_buffer_duration;
setup();
// Processing can resume with the new data structures in place.
data_mutex.unlock();
}
void Amuencha::Model::worker::setup()
{
// Start with A440, but this could be parametrizable as well
const float fref = 440;
const float log2_fref = log2(fref);
const int aref = 69; // use the midi numbering scheme, because why not
float log2_fmin = (min_midi_note - aref) / 12. + log2_fref;
float log2_fmax = (max_midi_note - aref) / 12. + log2_fref;
int num_bins = max_midi_note - min_midi_note;
frequencies.resize(num_bins);
for (int b{0}; b < num_bins; ++b)
{
float bratio = (float)b / (num_bins - 1.);
frequencies[b] = exp2(log2_fmin + (log2_fmax - log2_fmin) * bratio);
}
this->reassigned_frequencies = frequencies;
this->power_spectrum.resize(frequencies.size());
// Prepare the windows
//std::vector<std::vector<v4sf>> windowed_sines;
windowed_sines.resize(frequencies.size());
power_normalization_factors.resize(frequencies.size());
int big_buffer_size = 0;
for (int idx{0}; idx < frequencies.size(); ++idx)
{
// for each freq, span at least 20 periods for more precise measurements
// This still gives reasonable latencies, e.g. 50ms at 400Hz, 100ms at 200Hz, 400ms at 50Hz...
// Could also span more for even better measurements, with larger
// computation cost and latency
float f = frequencies[idx];
int window_size = (int)(min(periods / f, max_buffer_duration * 0.001f) * sampling_rate);
vector<float> window(window_size);
vector<float> window_deriv(window_size);
if (!read_from_cache(window, window_deriv))
{
initialize_window(window);
initialize_window_deriv(window_deriv);
write_to_cache(window, window_deriv);
}
windowed_sines[idx].resize(window_size);
float wsum = 0;
for (int i{0}; i < window_size;)
{
if (i < window_size - 4)
{
v4sf tfs = {
(float)(i - window_size - 1) / sampling_rate,
(float)(i + 1 - window_size - 1) / sampling_rate,
(float)(i + 2 - window_size - 1) / sampling_rate,
(float)(i + 3 - window_size - 1) / sampling_rate
};
tfs *= (float)(-two_pi * f);
v4sf sin_tf, cos_tf;
sincos_ps(tfs, &sin_tf, &cos_tf);
for (int j{0}; j < 3; ++j)
{
v4sf ws = {
cos_tf[j] * window[i+j],
sin_tf[j] * window[i+j],
cos_tf[j] * window_deriv[i+j],
sin_tf[j] * window_deriv[i+j]
};
windowed_sines[idx][i + j] = ws;
wsum += window[i+j];
}
i += 4;
continue;
}
float t = (float)(i - window_size - 1) / sampling_rate;
float re = cosf(-two_pi * t * f);
float im = sinf(-two_pi * t * f);
v4sf ws = {
re * window[i],
im * window[i],
re * window_deriv[i],
im * window_deriv[i]
};
windowed_sines[idx][i] = ws;
wsum += window[i];
++i;
}
power_normalization_factors[idx] = 1. / (wsum * wsum);
big_buffer_size = max(big_buffer_size, window_size);
}
big_buffer.clear();
// fill with 0 signal content to start with
big_buffer.resize(big_buffer_size, 0.f);
}
void Amuencha::Model::worker::work()
{
// Solution with a wait condition + time
// other possible solution = timer, but that would need to be stopped
mutex.lock();
// waiting_time is a mutex-protected info
waiting_time = CYCLE_PERIOD;
// loop starts with mutex locked
while (true)
{
condition.wait_for(&mutex, waiting_time);
if (status == QUIT_NOW) break;
if (status == HAS_NEW_DATA)
{
// Swap the chunks to a local variable, so:
// - the class chunks becomes empty
// - the audio thread can feed it more data while computing frequencies
vector<pair<float*, int>> chunks;
chunks.swap(this->chunks);
status = NO_DATA; // will be updated if new data indeed arrives
mutex.unlock();
// Now, we can take the time to do the frequency computations
data_mutex.lock();
int new_data_size = 0;
for (auto c : chunks) new_data_size += c.second;
// shift the old data to make room for the new
int new_data_pos = big_buffer.size() - new_data_size;
//cout << "data " << new_data_size << " " << big_buffer.size() << endl;
// std::copy can cope with overlapping regions in this copy direction
if (new_data_pos > 0)
copy(big_buffer.begin() + new_data_size, big_buffer.end(), big_buffer.begin());
// now copy each chunk at its position in the big buffer
for (auto c : chunks)
{
if (new_data_pos < 0)
{
// discard too old chunks
if (c.second <= -new_data_pos)
{
new_data_pos += c.second;
continue;
}
// partial copy of chunks that fall on the edge
copy(c.first + new_data_pos, c.first+c.second, &big_buffer[0]);
new_data_pos = c.second + new_data_pos;
continue;
}
copy(c.first, c.first+c.second, &big_buffer[new_data_pos]);
new_data_pos += c.second;
}
// Apply the filter bank
float *bbend = &big_buffer[0] + big_buffer.size();
for (int idx{0}; idx < frequencies.size(); ++idx)
{
const auto& ws = windowed_sines[idx];
int wsize = ws.size();
float* sig = bbend - wsize;
v4sf acc = {0.f, 0.f, 0.f, 0.f};
for (int i{0}; i < wsize; ++i) acc += ws[i] * sig[i];
float norm = acc[0] * acc[0] + acc[1] * acc[1];
float reassign = frequencies[idx];
if (norm > 0)
reassign -= (acc[0] * acc[3] - acc[1] * acc[2]) * samplerate_div_2pi / norm;
reassigned_frequencies[idx] = reassign;
power_spectrum[idx] = norm * power_normalization_factors[idx];
}
// Notify our listener that new power/frequency content has arrived
power_handler(reassigned_frequencies, power_spectrum);
// setup can now lock and change data structures if needed
data_mutex.unlock();
// relock for the condition wait
mutex.lock();
continue;
}
// No more data ? Force waiting until data arrives
waiting_time = ULONG_MAX;
// keep the lock for next loop
}
mutex.unlock();
}
void Amuencha::Model::worker::invalidate_samples()
{
mutex.lock();
data_mutex.lock();
chunks.clear();
data_mutex.unlock();
mutex.unlock();
}
void Amuencha::Model::worker::initialize_window(std::vector<float>& window) {
// Kaiser window with a parameter of alpha=3 that nullifies the window on edges
int size = window.size();
const float two_over_N = 2./size;
const float alpha = 3.;
const float alpha_pi = alpha * pi;
const float inv_denom = 1. /boost::math::cyl_bessel_i(0., alpha_pi);
for (int i{0}; i < size; ++i)
{
float p = i * two_over_N - 1.;
window[i] = boost::math::cyl_bessel_i(0., alpha_pi * sqrt(1. - p * p)) * inv_denom;
}
}
void Amuencha::Model::worker::initialize_window_deriv(std::vector<float>& window) {
// Derivative of the Kaiser window with a parameter of alpha=3 that nullifies the window on edges
int size = window.size();
const float two_over_N = 2./size;
const float alpha = 3.;
const float alpha_pi = alpha * pi;
const float inv_denom = 1./boost::math::cyl_bessel_i(0., alpha_pi);
for (int i{1}; i<size; ++i)
{
float p = i * two_over_N - 1.;
window[i] = boost::math::cyl_bessel_i(1., alpha_pi * sqrt(1. - p* p)) * inv_denom
* alpha_pi / sqrt(1. - p * p) * (-p) * two_over_N;
}
// lim I1(x)/x as x->0 = 1/2
window[0] = 0.5 * inv_denom * alpha_pi * alpha_pi * two_over_N;
}
bool Amuencha::Model::worker::read_from_cache(std::vector<float>& window, std::vector<float>& window_deriv)
{
auto it = mem_win_cache.find(window.size());
if (it != mem_win_cache.end())
{
window = it->second;
auto itd = mem_winderiv_cache.find(window.size());
if (itd != mem_winderiv_cache.end())
{
window_deriv = itd->second;
return true;
}
// else, load from disk
}
// TODO: make the cache location parametrizable (and an option to not use it)
ifstream file(".amuencha_cache/w" + to_string(window.size()) + ".bin", ios::binary);
if (file.is_open())
{
file.read(reinterpret_cast<char*>(&window[0]), window.size() * sizeof(float));
file.read(reinterpret_cast<char*>(&window_deriv[0]), window_deriv.size() * sizeof(float));
if (file.tellg() != (window.size() + window_deriv.size()) * sizeof(float))
{
cerr << "Error: invalid cache .amuencha_cache/w" + to_string(window.size()) + ".bin \n";
return false;
}
return true;
}
return false;
}
void Amuencha::Model::worker::write_to_cache(std::vector<float>& window, std::vector<float>& window_deriv)
{
#if defined(_WIN32) || defined(_WIN64)
mkdir(".amuencha_cache");
#else
mkdir(".amuencha_cache", 0755);
#endif
ofstream file(".amuencha_cache/w" + to_string(window.size()) + ".bin", ios::binary | ios::trunc);
file.write(reinterpret_cast<char*>(&window[0]), window.size() * sizeof(float));
file.write(reinterpret_cast<char*>(&window_deriv[0]), window_deriv.size() * sizeof(float));
mem_win_cache[window.size()] = window;
mem_winderiv_cache[window.size()] = window_deriv;
}