diff --git a/CMakeLists.txt b/CMakeLists.txt index 63a159d..ea41706 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,6 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) find_package(Threads REQUIRED) find_package(Qt5 COMPONENTS Core Widgets Multimedia REQUIRED) -find_package(WebRTC REQUIRED) if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267") diff --git a/src/AudioProcessor.cpp b/src/AudioProcessor.cpp index f13dcaf..04cbd7e 100644 --- a/src/AudioProcessor.cpp +++ b/src/AudioProcessor.cpp @@ -70,8 +70,10 @@ qint64 AudioProcessor::readData(char* data, qint64 maxlen) qint64 AudioProcessor::writeData(const char* data, qint64 len) { - std::unique_lock lock(inputMutex_); + std::unique_lock lock1(inputMutex_); + std::unique_lock lock2(inputEventMutex_); inputBuffer_.append(data, len); + inputEvent_.notify_all(); return len; } @@ -79,7 +81,7 @@ void AudioProcessor::process() { while (doWork_) { - auto waitUntil = std::chrono::high_resolution_clock::now() + std::chrono::milliseconds(5); + std::unique_lock processLock(processMutex_); const std::size_t bytesToRead = bufferSize_ * format_.sampleSize() / 8 * format_.channelCount(); @@ -119,13 +121,15 @@ void AudioProcessor::process() { std::unique_lock lock(outputMutex_); outputBuffer_.append(buf.constData(), buf.byteCount()); + emit readyRead(); } - - - emit readyRead(); } - - std::this_thread::sleep_until(waitUntil); + else + { + processLock.unlock(); + std::unique_lock lock(inputEventMutex_); + inputEvent_.wait(lock); + } } } @@ -144,6 +148,22 @@ void AudioProcessor::processBuffer(QAudioBuffer& inputBuffer, const QAudioBuffer emit outputLevelsChanged(outputLevels); } +void AudioProcessor::clearBuffers() +{ + { + std::unique_lock lock(inputMutex_); + inputBuffer_.clear(); + } + { + std::unique_lock lock(outputMutex_); + outputBuffer_.clear(); + } + { + std::unique_lock lock(monitorMutex_); + monitorBuffer_.clear(); + } +} + bool AudioProcessor::isSequential() const { return true; @@ -162,13 +182,8 @@ qint64 AudioProcessor::bytesAvailable() const bool AudioProcessor::open(QIODevice::OpenMode mode) { - std::unique_lock lock1(inputMutex_); - std::unique_lock lock2(outputMutex_); - std::unique_lock lock3(monitorMutex_); - - inputBuffer_.clear(); - outputBuffer_.clear(); - monitorBuffer_.clear(); + std::unique_lock lock(processMutex_); + clearBuffers(); sourceEncoder_.reset(new WavFileWriter("source.wav", format_)); sourceEncoder_->open(); @@ -204,13 +219,8 @@ Backend AudioProcessor::getCurrentBackend() const void AudioProcessor::switchBackend(Backend backend) { - std::unique_lock lock1(inputMutex_); - std::unique_lock lock2(outputMutex_); - std::unique_lock lock3(monitorMutex_); - - inputBuffer_.clear(); - outputBuffer_.clear(); - monitorBuffer_.clear(); + std::unique_lock lock(processMutex_); + clearBuffers(); if (backend == Backend::Speex) dsp_.reset(new SpeexDSP(format_, monitorFormat_)); diff --git a/src/AudioProcessor.h b/src/AudioProcessor.h index be63783..afc176e 100644 --- a/src/AudioProcessor.h +++ b/src/AudioProcessor.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -57,11 +58,14 @@ protected: private: void process(); void processBuffer(QAudioBuffer& inputBuffer, const QAudioBuffer& monitorBuffer); + void clearBuffers(); mutable std::mutex inputMutex_; mutable std::mutex outputMutex_; mutable std::mutex monitorMutex_; + std::mutex processMutex_; + std::size_t bufferSize_; const QAudioFormat format_; const QAudioFormat monitorFormat_; @@ -76,6 +80,9 @@ private: std::thread worker_; bool doWork_ = false; + std::condition_variable inputEvent_; + std::mutex inputEventMutex_; + QScopedPointer sourceEncoder_; QScopedPointer processedEncoder_; }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6d943f..c4ae605 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,7 @@ +find_package(LibWebRTC REQUIRED) +include(${LIBWEBRTC_USE_FILE}) + set(TARGET_NAME speex_webrtc_test) set(CMAKE_AUTOMOC ON) @@ -17,7 +20,7 @@ file(GLOB_RECURSE RESOURCES *.qrc) target_link_libraries(${TARGET_NAME} speexdsp - WebRTC + ${LIBWEBRTC_LIBRARIES} Qt5::Core Qt5::Widgets Qt5::Multimedia diff --git a/src/MainWindow.cpp b/src/MainWindow.cpp index 1043388..f0d68d5 100644 --- a/src/MainWindow.cpp +++ b/src/MainWindow.cpp @@ -44,6 +44,9 @@ MainWindow::MainWindow() : ui(new Ui::MainWindow) { ui->setupUi(this); + audioInputThread_.start(); + audioOutputThread_.start(); + qDebug(Gui) << "Enumerating audio devices..."; for (auto& deviceInfo : QAudioDeviceInfo::availableDevices(QAudio::AudioInput)) { @@ -83,6 +86,8 @@ MainWindow::MainWindow() : ui(new Ui::MainWindow) MainWindow::~MainWindow() { stopRecording(); + audioInputThread_.exit(0); + audioOutputThread_.exit(0); } void fixFormatForDevice(QAudioFormat& format, const QAudioDeviceInfo& info) @@ -114,6 +119,10 @@ void MainWindow::initializeAudio(const QAudioDeviceInfo& inputDeviceInfo, audioOutput_.reset(new QAudioOutput(outputDeviceInfo, outputFormat)); monitorInput_.reset(new QAudioInput(monitorDeviceInfo, monitorFormat)); + audioInput_->moveToThread(&audioInputThread_); + audioOutput_->moveToThread(&audioOutputThread_); + monitorInput_->moveToThread(&audioInputThread_); + processor_.reset(new AudioProcessor(captureFormat, monitorFormat, monitorBuffer_)); connect(processor_.get(), &AudioProcessor::voiceActivityChanged, this, &MainWindow::updateVoiceActivity); @@ -132,6 +141,9 @@ void MainWindow::startRecording() audioInput_->start(processor_.get()); audioOutput_->start(processor_.get()); monitorInput_->start(&monitorBuffer_); + + qInfo(Gui) << "input buffer size:" << audioInput_->bufferSize(); + qInfo(Gui) << "output buffer size:" << audioOutput_->bufferSize(); } void MainWindow::stopRecording() @@ -191,11 +203,21 @@ QString levelFromCode(int value) void MainWindow::setupDials(Backend backend) { ui->noiseGroupBox->setChecked(false); - ui->agcGroupBox->setChecked(false); - ui->aecGroupBox->setChecked(false); ui->noiseSuppressionDial->setValue(0); + + ui->agcGroupBox->setChecked(false); ui->agcLevelDial->setValue(0); ui->agcLevelValue->setText("0 dBFS"); + ui->agcMaxGainDial->setValue(0); + ui->agcMaxGainValue->setText("0 dB"); + ui->agcMaxIncrementDial->setValue(0); + ui->agcMaxIncrementValue->setText("0 dB/sec"); + ui->agcMaxDecrementDial->setValue(0); + ui->agcMaxDecrementValue->setText("0 dB/sec"); + + ui->aecGroupBox->setChecked(false); + ui->aecSuppressionDial->setValue(0); + updateVoiceActivity(false); if (backend == Backend::Speex) @@ -234,15 +256,18 @@ void MainWindow::setupDials(Backend backend) void MainWindow::changeNoiseReductionSettings() { - processor_->setEffectParam("noise_reduction_enabled", ui->noiseGroupBox->isChecked()); if (currentBackend() == Backend::Speex) { + std::int32_t enabled = ui->noiseGroupBox->isChecked() ? 1 : 0; + processor_->setEffectParam("noise_reduction_enabled", enabled); + std::int32_t maxAttenuation = -ui->noiseSuppressionDial->value(); ui->noiseSuppressionValue->setText(QString("%1 dB").arg(-maxAttenuation)); processor_->setEffectParam("noise_reduction_max_attenuation", maxAttenuation); } else { + processor_->setEffectParam("noise_reduction_enabled", ui->noiseGroupBox->isChecked()); int suppressionLevel = ui->noiseSuppressionDial->value(); ui->noiseSuppressionValue->setText(levelFromCode(suppressionLevel)); processor_->setEffectParam("noise_reduction_suppression_level", suppressionLevel); @@ -251,10 +276,11 @@ void MainWindow::changeNoiseReductionSettings() void MainWindow::changeAGCSettings() { - processor_->setEffectParam("gain_control_enabled", ui->agcGroupBox->isChecked()); - if (currentBackend() == Backend::Speex) { + std::int32_t enabled = ui->agcGroupBox->isChecked() ? 1 : 0; + processor_->setEffectParam("gain_control_enabled", enabled); + std::int32_t level = QAudio::convertVolume(-ui->agcLevelDial->value(), QAudio::DecibelVolumeScale, QAudio::LinearVolumeScale) * @@ -276,6 +302,8 @@ void MainWindow::changeAGCSettings() } else { + processor_->setEffectParam("gain_control_enabled", ui->agcGroupBox->isChecked()); + int level = ui->agcLevelDial->value(); ui->agcLevelValue->setText(QString("%1 dBFS").arg(-ui->agcLevelDial->value())); processor_->setEffectParam("gain_control_target_level", level); @@ -288,9 +316,11 @@ void MainWindow::changeAGCSettings() void MainWindow::changeAECSettings() { - processor_->setEffectParam("echo_cancellation_enabled", ui->aecGroupBox->isChecked()); if (currentBackend() == Backend::Speex) { + std::int32_t enabled = ui->aecGroupBox->isChecked() ? 1 : 0; + processor_->setEffectParam("echo_cancellation_enabled", enabled); + std::int32_t maxAttenuation = -ui->aecSuppressionDial->value(); ui->aecSuppressionValue->setText(QString("%1 dB").arg(-maxAttenuation)); @@ -298,6 +328,8 @@ void MainWindow::changeAECSettings() } else { + processor_->setEffectParam("echo_cancellation_enabled", ui->aecGroupBox->isChecked()); + int suppressionLevel = ui->aecSuppressionDial->value(); ui->aecSuppressionValue->setText(levelFromCode(suppressionLevel)); diff --git a/src/MainWindow.h b/src/MainWindow.h index 4fdd0de..9966c09 100644 --- a/src/MainWindow.h +++ b/src/MainWindow.h @@ -7,6 +7,7 @@ #include #include #include +#include QT_BEGIN_NAMESPACE namespace Ui { @@ -51,6 +52,9 @@ private: Ui::MainWindow* ui = nullptr; + QThread audioInputThread_; + QThread audioOutputThread_; + QScopedPointer audioInput_; QScopedPointer monitorInput_; QScopedPointer audioOutput_; diff --git a/src/WebRTCDSP.cpp b/src/WebRTCDSP.cpp index c9ae6ab..4eafb2f 100644 --- a/src/WebRTCDSP.cpp +++ b/src/WebRTCDSP.cpp @@ -2,8 +2,8 @@ #include "Timer.h" +#include #include -#include #include @@ -11,6 +11,8 @@ namespace SpeexWebRTCTest { namespace { +using NoiseSuppressionLevel = webrtc::AudioProcessing::Config::NoiseSuppression::Level; + Q_LOGGING_CATEGORY(WebRTC, "webrtc") void convert(const QAudioBuffer& from, webrtc::AudioFrame& to) @@ -18,8 +20,7 @@ void convert(const QAudioBuffer& from, webrtc::AudioFrame& to) to.num_channels_ = from.format().channelCount(); to.sample_rate_hz_ = from.format().sampleRate(); to.samples_per_channel_ = from.frameCount(); - to.interleaved_ = (from.format().channelCount() > 1); - memcpy(to.data_, from.constData(), from.byteCount()); + memcpy(to.mutable_data(), from.constData(), from.byteCount()); } void convert(const webrtc::AudioFrame& from, QAudioBuffer& to) @@ -32,7 +33,7 @@ void convert(const webrtc::AudioFrame& from, QAudioBuffer& to) format.setByteOrder(QAudioFormat::LittleEndian); format.setSampleType(QAudioFormat::SignedInt); - QByteArray data(reinterpret_cast(from.data_), + QByteArray data(reinterpret_cast(from.data()), from.samples_per_channel_ * from.num_channels_ * sizeof(std::int16_t)); to = QAudioBuffer(data, format); } @@ -42,33 +43,29 @@ void convert(const webrtc::AudioFrame& from, QAudioBuffer& to) WebRTCDSP::WebRTCDSP(const QAudioFormat& mainFormat, const QAudioFormat& auxFormat) : AudioEffect(mainFormat, auxFormat) { - apm_ = webrtc::AudioProcessing::Create(); + apm_ = webrtc::AudioProcessingBuilder().Create(); + if (!apm_) throw std::runtime_error("failed to create webrtc::AudioProcessing instance"); - webrtc::Config config; - config.Set(new webrtc::DelayAgnostic(true)); - config.Set(new webrtc::ExtendedFilter(true)); - apm_->SetExtraOptions(config); + webrtc::AudioProcessing::Config config; - apm_->voice_detection()->Enable(true); - apm_->voice_detection()->set_frame_size_ms(300); - apm_->voice_detection()->set_likelihood(webrtc::VoiceDetection::kModerateLikelihood); + config.voice_detection.enabled = true; - apm_->noise_suppression()->Enable(false); - apm_->noise_suppression()->set_level(webrtc::NoiseSuppression::kLow); + config.noise_suppression.enabled = false; + config.noise_suppression.level = NoiseSuppressionLevel::kLow; - apm_->echo_cancellation()->Enable(false); - apm_->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kLowSuppression); - apm_->set_stream_delay_ms(100); + config.echo_canceller.enabled = false; + config.echo_canceller.mobile_mode = false; + config.residual_echo_detector.enabled = false; - apm_->gain_control()->Enable(false); - apm_->gain_control()->set_mode(webrtc::GainControl::kAdaptiveDigital); - apm_->gain_control()->enable_limiter(true); - apm_->gain_control()->set_compression_gain_db(0); - apm_->gain_control()->set_target_level_dbfs(0); + config.gain_controller1.enabled = false; + config.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital; + config.gain_controller1.enable_limiter = true; + config.gain_controller1.compression_gain_db = 0; + config.gain_controller1.target_level_dbfs = 0; - // apm_->Initialize(); + apm_->ApplyConfig(config); } WebRTCDSP::~WebRTCDSP() @@ -136,7 +133,7 @@ void WebRTCDSP::processFrame(QAudioBuffer& mainBuffer, const QAudioBuffer& auxBu int error; - if (apm_->echo_cancellation()->is_enabled()) + if (apm_->GetConfig().echo_canceller.enabled) { error = apm_->ProcessReverseStream(&auxFrame); if (error != 0) @@ -157,30 +154,33 @@ void WebRTCDSP::processFrame(QAudioBuffer& mainBuffer, const QAudioBuffer& auxBu convert(mainFrame, mainBuffer); - setVoiceActive(apm_->voice_detection()->stream_has_voice()); + setVoiceActive(*apm_->GetStatistics().voice_detected); } void WebRTCDSP::setParameter(const QString& param, QVariant value) { + auto config = apm_->GetConfig(); if (param == "noise_reduction_enabled") - apm_->noise_suppression()->Enable(value.toBool()); + config.noise_suppression.enabled = value.toBool(); else if (param == "noise_reduction_suppression_level") - apm_->noise_suppression()->set_level(static_cast( - webrtc::NoiseSuppression::kLow + value.toUInt())); + config.noise_suppression.level = + static_cast(NoiseSuppressionLevel::kLow + value.toUInt()); else if (param == "echo_cancellation_enabled") - apm_->echo_cancellation()->Enable(value.toBool()); + { + config.echo_canceller.enabled = value.toBool(); + config.residual_echo_detector.enabled = value.toBool(); + } else if (param == "echo_cancellation_suppression_level") - apm_->echo_cancellation()->set_suppression_level( - static_cast( - webrtc::EchoCancellation::kLowSuppression + value.toUInt())); + return; // TODO ??? else if (param == "gain_control_enabled") - apm_->gain_control()->Enable(value.toBool()); + config.gain_controller1.enabled = value.toBool(); else if (param == "gain_control_target_level") - apm_->gain_control()->set_target_level_dbfs(value.toInt()); + config.gain_controller1.target_level_dbfs = value.toInt(); else if (param == "gain_control_max_gain") - apm_->gain_control()->set_compression_gain_db(value.toInt()); + config.gain_controller1.compression_gain_db = value.toInt(); else throw std::invalid_argument("Invalid param"); + apm_->ApplyConfig(config); } unsigned int WebRTCDSP::requiredFrameSizeMs() const