blob: 18b2a845da32171314c93b68c5407e250bf83301 [file] [log] [blame]
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
#include <algorithm>
#include <array>
#include <memory>
#include <numeric>
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
bool EnableSmoothUpdatesTailFreqResp() {
return !field_trial::IsEnabled(
"WebRTC-Aec3SmoothUpdatesTailFreqRespKillSwitch");
}
// Computes the ratio of the energies between the direct path and the tail. The
// energy is computed in the power spectrum domain discarding the DC
// contributions.
float ComputeRatioEnergies(
const rtc::ArrayView<const float>& freq_resp_direct_path,
const rtc::ArrayView<const float>& freq_resp_tail) {
// Skipping the DC for the ratio computation
constexpr size_t n_skip_bins = 1;
RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size());
float direct_path_energy =
std::accumulate(freq_resp_direct_path.begin() + n_skip_bins,
freq_resp_direct_path.end(), 0.f);
float tail_energy = std::accumulate(freq_resp_tail.begin() + n_skip_bins,
freq_resp_tail.end(), 0.f);
if (direct_path_energy > 0) {
return tail_energy / direct_path_energy;
} else {
return 0.f;
}
}
} // namespace
ReverbModelEstimator::ReverbModelEstimator(const EchoCanceller3Config& config)
: filter_main_length_blocks_(config.filter.main.length_blocks),
reverb_decay_(fabsf(config.ep_strength.default_len)),
enable_smooth_freq_resp_tail_updates_(EnableSmoothUpdatesTailFreqResp()) {
block_energies_.fill(0.f);
freq_resp_tail_.fill(0.f);
}
ReverbModelEstimator::~ReverbModelEstimator() = default;
bool ReverbModelEstimator::IsAGoodFilterForDecayEstimation(
int filter_delay_blocks,
bool usable_linear_estimate,
size_t length_filter) {
if ((filter_delay_blocks && usable_linear_estimate) &&
(filter_delay_blocks <=
static_cast<int>(filter_main_length_blocks_) - 4) &&
(length_filter >=
static_cast<size_t>(GetTimeDomainLength(filter_main_length_blocks_)))) {
return true;
} else {
return false;
}
}
void ReverbModelEstimator::Update(
const std::vector<float>& impulse_response,
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_freq_response,
const absl::optional<float>& quality_linear,
int filter_delay_blocks,
bool usable_linear_estimate,
float default_decay,
bool stationary_block) {
if (enable_smooth_freq_resp_tail_updates_) {
if (!stationary_block) {
float alpha = 0;
if (quality_linear) {
alpha = 0.2f * quality_linear.value();
UpdateFreqRespTail(filter_freq_response, filter_delay_blocks, alpha);
}
if (IsAGoodFilterForDecayEstimation(filter_delay_blocks,
usable_linear_estimate,
impulse_response.size())) {
alpha_ = std::max(alpha, alpha_);
if ((alpha_ > 0.f) && (default_decay < 0.f)) {
// Echo tail decay estimation if default_decay is negative.
UpdateReverbDecay(impulse_response);
}
} else {
ResetDecayEstimation();
}
}
} else {
UpdateFreqRespTail(filter_freq_response, filter_delay_blocks, 0.1f);
}
}
void ReverbModelEstimator::ResetDecayEstimation() {
accumulated_nz_ = 0.f;
accumulated_nn_ = 0.f;
accumulated_count_ = 0.f;
current_reverb_decay_section_ = 0;
num_reverb_decay_sections_ = 0;
num_reverb_decay_sections_next_ = 0;
found_end_of_reverb_decay_ = false;
alpha_ = 0.f;
}
void ReverbModelEstimator::UpdateReverbDecay(
const std::vector<float>& impulse_response) {
constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2;
// Form the data to match against by squaring the impulse response
// coefficients.
std::array<float, GetTimeDomainLength(kMaxAdaptiveFilterLength)>
matching_data_data;
RTC_DCHECK_LE(GetTimeDomainLength(filter_main_length_blocks_),
matching_data_data.size());
rtc::ArrayView<float> matching_data(
matching_data_data.data(),
GetTimeDomainLength(filter_main_length_blocks_));
std::transform(
impulse_response.begin(), impulse_response.end(), matching_data.begin(),
[](float a) { return a * a; }); // TODO(devicentepena) check if focusing
// on one block would be enough.
if (current_reverb_decay_section_ < filter_main_length_blocks_) {
// Update accumulated variables for the current filter section.
const size_t start_index = current_reverb_decay_section_ * kFftLengthBy2;
RTC_DCHECK_GT(matching_data.size(), start_index);
RTC_DCHECK_GE(matching_data.size(), start_index + kFftLengthBy2);
float section_energy =
std::accumulate(matching_data.begin() + start_index,
matching_data.begin() + start_index + kFftLengthBy2,
0.f) *
kOneByFftLengthBy2;
section_energy = std::max(
section_energy, 1e-32f); // Regularization to avoid division by 0.
RTC_DCHECK_LT(current_reverb_decay_section_, block_energies_.size());
const float energy_ratio =
block_energies_[current_reverb_decay_section_] / section_energy;
found_end_of_reverb_decay_ = found_end_of_reverb_decay_ ||
(energy_ratio > 1.1f || energy_ratio < 0.9f);
// Count consecutive number of "good" filter sections, where "good" means:
// 1) energy is above noise floor.
// 2) energy of current section has not changed too much from last check.
if (!found_end_of_reverb_decay_ && section_energy > tail_energy_) {
++num_reverb_decay_sections_next_;
} else {
found_end_of_reverb_decay_ = true;
}
block_energies_[current_reverb_decay_section_] = section_energy;
if (num_reverb_decay_sections_ > 0) {
// Linear regression of log squared magnitude of impulse response.
for (size_t i = 0; i < kFftLengthBy2; i++) {
RTC_DCHECK_GT(matching_data.size(), start_index + i);
float z = FastApproxLog2f(matching_data[start_index + i] + 1e-10);
accumulated_nz_ += accumulated_count_ * z;
++accumulated_count_;
}
}
num_reverb_decay_sections_ =
num_reverb_decay_sections_ > 0 ? num_reverb_decay_sections_ - 1 : 0;
++current_reverb_decay_section_;
} else {
constexpr float kMaxDecay = 0.95f; // ~1 sec min RT60.
constexpr float kMinDecay = 0.02f; // ~15 ms max RT60.
// Accumulated variables throughout whole filter.
// Solve for decay rate.
float decay = reverb_decay_;
if (accumulated_nn_ != 0.f) {
const float exp_candidate = -accumulated_nz_ / accumulated_nn_;
decay = powf(2.0f, -exp_candidate * kFftLengthBy2);
decay = std::min(decay, kMaxDecay);
decay = std::max(decay, kMinDecay);
}
// Filter tail energy (assumed to be noise).
constexpr size_t kTailLength = kFftLengthBy2;
constexpr float k1ByTailLength = 1.f / kTailLength;
const size_t tail_index =
GetTimeDomainLength(filter_main_length_blocks_) - kTailLength;
RTC_DCHECK_GT(matching_data.size(), tail_index);
tail_energy_ = std::accumulate(matching_data.begin() + tail_index,
matching_data.end(), 0.f) *
k1ByTailLength;
// Update length of decay.
num_reverb_decay_sections_ = num_reverb_decay_sections_next_;
num_reverb_decay_sections_next_ = 0;
// Must have enough data (number of sections) in order
// to estimate decay rate.
if (num_reverb_decay_sections_ < 5) {
num_reverb_decay_sections_ = 0;
}
const float N = num_reverb_decay_sections_ * kFftLengthBy2;
accumulated_nz_ = 0.f;
const float k1By12 = 1.f / 12.f;
// Arithmetic sum $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly.
accumulated_nn_ = N * (N * N - 1.0f) * k1By12;
accumulated_count_ = -N * 0.5f;
// Linear regression approach assumes symmetric index around 0.
accumulated_count_ += 0.5f;
// Identify the peak index of the impulse response.
const size_t peak_index = std::distance(
matching_data.begin(),
std::max_element(matching_data.begin(), matching_data.end()));
current_reverb_decay_section_ = peak_index * kOneByFftLengthBy2 + 3;
// Make sure we're not out of bounds.
if (current_reverb_decay_section_ + 1 >= filter_main_length_blocks_) {
current_reverb_decay_section_ = filter_main_length_blocks_;
}
size_t start_index = current_reverb_decay_section_ * kFftLengthBy2;
float first_section_energy =
std::accumulate(matching_data.begin() + start_index,
matching_data.begin() + start_index + kFftLengthBy2,
0.f) *
kOneByFftLengthBy2;
// To estimate the reverb decay, the energy of the first filter section
// must be substantially larger than the last.
// Also, the first filter section energy must not deviate too much
// from the max peak.
bool main_filter_has_reverb = first_section_energy > 4.f * tail_energy_;
bool main_filter_is_sane = first_section_energy > 2.f * tail_energy_ &&
matching_data[peak_index] < 100.f;
// Not detecting any decay, but tail is over noise - assume max decay.
if (num_reverb_decay_sections_ == 0 && main_filter_is_sane &&
main_filter_has_reverb) {
decay = kMaxDecay;
}
if (main_filter_is_sane && num_reverb_decay_sections_ > 0) {
decay = std::max(.97f * reverb_decay_, decay);
reverb_decay_ -= alpha_ * (reverb_decay_ - decay);
}
found_end_of_reverb_decay_ =
!(main_filter_is_sane && main_filter_has_reverb);
alpha_ = 0.f; // Stop estimation of the decay until another good filter is
// received
}
}
// Updates the estimation of the frequency response at the filter tail.
void ReverbModelEstimator::UpdateFreqRespTail(
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_freq_response,
int filter_delay_blocks,
float alpha) {
size_t num_blocks = filter_freq_response.size();
rtc::ArrayView<const float> freq_resp_tail(
filter_freq_response[num_blocks - 1]);
rtc::ArrayView<const float> freq_resp_direct_path(
filter_freq_response[filter_delay_blocks]);
float ratio_energies =
ComputeRatioEnergies(freq_resp_direct_path, freq_resp_tail);
ratio_tail_to_direct_path_ +=
alpha * (ratio_energies - ratio_tail_to_direct_path_);
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
freq_resp_tail_[k] = freq_resp_direct_path[k] * ratio_tail_to_direct_path_;
}
for (size_t k = 1; k < kFftLengthBy2; ++k) {
float avg_neighbour =
0.5f * (freq_resp_tail_[k - 1] + freq_resp_tail_[k + 1]);
freq_resp_tail_[k] = std::max(freq_resp_tail_[k], avg_neighbour);
}
}
void ReverbModelEstimator::Dump(
const std::unique_ptr<ApmDataDumper>& data_dumper) {
data_dumper->DumpRaw("aec3_reverb_decay", reverb_decay_);
data_dumper->DumpRaw("aec3_reverb_tail_energy", tail_energy_);
data_dumper->DumpRaw("aec3_reverb_alpha", alpha_);
data_dumper->DumpRaw("aec3_num_reverb_decay_sections",
static_cast<int>(num_reverb_decay_sections_));
}
} // namespace webrtc