webrtc beamforming 編譯過程記錄

本文主要介紹webrtc中的波束模塊的編譯過程,關於波束算法的技術原理將會在下篇文章中介紹。
webrtc是一個極其龐大的項目,裏面的文件包含特別複雜。正是因爲如此,對還是小白的我造成了極大的困難。剛開始打算採取的方法是將波束模塊裏的nonlinear_beamformer_test.cc當做主文件,然後編譯這個,差啥文件就往vs工程裏面補,這樣搞了一段時間後,便發現這種方式實在是太過愚蠢。因爲有可能爲了編譯一個波束模塊,把整個webrtc的代碼都弄了過來。後來接觸到了cmake,才發現了這個工具的神奇。在webrtc\modules\audio_processing\beamformer目錄下找到了波束模塊的關鍵代碼,其中nonlinear_beamformer_test.cc就是波束模塊的測試代碼。裏面的代碼如下
beamformer相關目錄

/*
 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include <vector>#include "gflags/gflags.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/format_macros.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/common_audio/wav_file.h"
#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
#include "webrtc/modules/audio_processing/test/test_utils.h"DEFINE_string(i, "", "The name of the input file to read from.");
DEFINE_string(o, "out.wav", "Name of the output file to write to.");
DEFINE_string(mic_positions, "",
    "Space delimited cartesian coordinates of microphones in meters. "
    "The coordinates of each point are contiguous. "
    "For a two element array: \"x1 y1 z1 x2 y2 z2\"");namespace webrtc {
namespace {const int kChunksPerSecond = 100;
const int kChunkSizeMs = 1000 / kChunksPerSecond;const char kUsage[] =
    "Command-line tool to run beamforming on WAV files. The signal is passed\n"
    "in as a single band, unlike the audio processing interface which splits\n"
    "signals into multiple bands.";}  // namespaceint main(int argc, char* argv[]) {
  google::SetUsageMessage(kUsage);
  google::ParseCommandLineFlags(&argc, &argv, true);
​
  WavReader in_file(FLAGS_i);
  WavWriter out_file(FLAGS_o, in_file.sample_rate(), 1);const size_t num_mics = in_file.num_channels();
  const std::vector<Point> array_geometry =
      ParseArrayGeometry(FLAGS_mic_positions, num_mics);
  RTC_CHECK_EQ(array_geometry.size(), num_mics);
​
  NonlinearBeamformer bf(array_geometry);
  bf.Initialize(kChunkSizeMs, in_file.sample_rate());printf("Input file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n",
         FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate());
  printf("Output file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n",
         FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate());
​
  ChannelBuffer<float> in_buf(
      rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond),
      in_file.num_channels());
  ChannelBuffer<float> out_buf(
      rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond),
      out_file.num_channels());
​
  std::vector<float> interleaved(in_buf.size());
  while (in_file.ReadSamples(interleaved.size(),
                             &interleaved[0]) == interleaved.size()) {
    FloatS16ToFloat(&interleaved[0], interleaved.size(), &interleaved[0]);
    Deinterleave(&interleaved[0], in_buf.num_frames(),
                 in_buf.num_channels(), in_buf.channels());
​
    bf.ProcessChunk(in_buf, &out_buf);Interleave(out_buf.channels(), out_buf.num_frames(),
               out_buf.num_channels(), &interleaved[0]);
    FloatToFloatS16(&interleaved[0], interleaved.size(), &interleaved[0]);
    // 這裏的代碼經過了略微的修改,否則得到的語音會是加長效果不對。
    size_t len1 = interleaved.size();
    size_t len2 = in_buf.num_channels();
    out_file.WriteSamples(&interleaved[0], len1 / len2);
  }return 0;
}}  // namespace webrtcint main(int argc, char* argv[]) {
  return webrtc::main(argc, argv);
}

根據這個測試文件,大概所需要的關鍵部分有:解析控制檯輸入的gflags;讀寫wav文件的WavReader,WavWriter這兩個類都在\webrtc\common_audio目錄下的wav_file.h中;以及波束類NonlinearBeamformer,它的相關參數定義及實現在nonlinear_beamformer.cc以及nonlinear_beamformer.h中,其實最核心的代碼就是:

bf.ProcessChunk(in_buf, &out_buf)

要想弄明白webrtc的波束算法原理,那麼就得弄明白該函數的執行流程以及實現原理。具體的實現流程將在下次介紹。
接下來就開始編譯波束模塊代碼了。首先給出CMakeLists.txt文件.這裏面的目錄結構是我的機器上的,若在你的機器上編譯,可以需要改變相應的目錄結構

cmake_minimum_required(VERSION 2.8)project(wav-beamforming)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)add_subdirectory(gflags)add_definitions("-DWEBRTC_LINUX -DWEBRTC_POSIX -DWEBRTC_NS_FLOAT")
#-DWEBRTC_UNTRUSTED_DELAY
​
include_directories(
    webrtc
    webrtc/webrtc/common_audio/signal_processing/include
    webrtc/webrtc/modules/audio_coding/codecs/isac/main/include
    webrtc/webrtc/modules/audio_processing/test/)set(WEBRTC_SRC_
    base/buffer.cc
    base/checks.cc
    base/criticalsection.cc
    base/event.cc
    base/event_tracer.cc
    base/logging.cc
    base/platform_file.cc
    base/platform_thread.cc
    base/stringencode.cc
    base/thread_checker_impl.cc
    base/timeutils.cc
    common_audio/audio_converter.cc
    common_audio/audio_ring_buffer.cc
    common_audio/audio_util.cc
    common_audio/blocker.cc
    common_audio/channel_buffer.cc
    common_audio/fft4g.c
    common_audio/fir_filter.cc
    common_audio/fir_filter_sse.cc
    common_audio/lapped_transform.cc
    common_audio/real_fourier.cc
    common_audio/real_fourier_ooura.cc
    common_audio/resampler/push_resampler.cc
    common_audio/resampler/push_sinc_resampler.cc
    common_audio/resampler/resampler.cc
    common_audio/resampler/sinc_resampler.cc
    common_audio/resampler/sinc_resampler_sse.cc
    common_audio/resampler/sinusoidal_linear_chirp_source.cc
    common_audio/ring_buffer.c
    common_audio/signal_processing/auto_correlation.c
    common_audio/signal_processing/auto_corr_to_refl_coef.c
    common_audio/signal_processing/complex_bit_reverse.c
    common_audio/signal_processing/complex_fft.c
    common_audio/signal_processing/copy_set_operations.c
    common_audio/signal_processing/cross_correlation.c
    common_audio/signal_processing/division_operations.c
    common_audio/signal_processing/dot_product_with_scale.c
    common_audio/signal_processing/downsample_fast.c
    common_audio/signal_processing/energy.c
    common_audio/signal_processing/filter_ar.c
    common_audio/signal_processing/filter_ar_fast_q12.c
    common_audio/signal_processing/filter_ma_fast_q12.c
    common_audio/signal_processing/get_hanning_window.c
    common_audio/signal_processing/get_scaling_square.c
    common_audio/signal_processing/ilbc_specific_functions.c
    common_audio/signal_processing/levinson_durbin.c
    common_audio/signal_processing/lpc_to_refl_coef.c
    common_audio/signal_processing/min_max_operations.c
    common_audio/signal_processing/randomization_functions.c
    common_audio/signal_processing/real_fft.c
    common_audio/signal_processing/refl_coef_to_lpc.c
    common_audio/signal_processing/resample_48khz.c
    common_audio/signal_processing/resample_by_2.c
    common_audio/signal_processing/resample_by_2_internal.c
    common_audio/signal_processing/resample.c
    common_audio/signal_processing/resample_fractional.c
    common_audio/signal_processing/spl_init.c
    common_audio/signal_processing/splitting_filter.c
    common_audio/signal_processing/spl_sqrt.c
    common_audio/signal_processing/spl_sqrt_floor.c
    common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
    common_audio/signal_processing/vector_scaling_operations.c
    common_audio/sparse_fir_filter.cc
    common_audio/vad/vad_core.c
    common_audio/vad/vad_filterbank.c
    common_audio/vad/vad_gmm.c
    common_audio/vad/vad_sp.c
    common_audio/vad/webrtc_vad.c
    common_audio/wav_file.cc
    common_audio/wav_header.cc
    common_audio/window_generator.cc
    common_types.cc
    modules/audio_coding/codecs/audio_decoder.cc
    modules/audio_coding/codecs/audio_encoder.cc
    modules/audio_coding/codecs/isac/locked_bandwidth_info.cc
    modules/audio_coding/codecs/isac/main/source/arith_routines.c
    modules/audio_coding/codecs/isac/main/source/arith_routines_hist.c
    modules/audio_coding/codecs/isac/main/source/arith_routines_logist.c
    modules/audio_coding/codecs/isac/main/source/audio_decoder_isac.cc
    modules/audio_coding/codecs/isac/main/source/audio_encoder_isac.cc
    modules/audio_coding/codecs/isac/main/source/bandwidth_estimator.c
    modules/audio_coding/codecs/isac/main/source/crc.c
    modules/audio_coding/codecs/isac/main/source/decode_bwe.c
    modules/audio_coding/codecs/isac/main/source/decode.c
    modules/audio_coding/codecs/isac/main/source/encode.c
    modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.c
    modules/audio_coding/codecs/isac/main/source/entropy_coding.c
    modules/audio_coding/codecs/isac/main/source/fft.c
    modules/audio_coding/codecs/isac/main/source/filterbanks.c
    modules/audio_coding/codecs/isac/main/source/filterbank_tables.c
    modules/audio_coding/codecs/isac/main/source/filter_functions.c
    modules/audio_coding/codecs/isac/main/source/intialize.c
    modules/audio_coding/codecs/isac/main/source/isac.c
    modules/audio_coding/codecs/isac/main/source/lattice.c
    modules/audio_coding/codecs/isac/main/source/lpc_analysis.c
    modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.c
    modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.c
    modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.c
    modules/audio_coding/codecs/isac/main/source/lpc_tables.c
    modules/audio_coding/codecs/isac/main/source/pitch_estimator.c
    modules/audio_coding/codecs/isac/main/source/pitch_filter.c
    modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.c
    modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.c
    modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.c
    modules/audio_coding/codecs/isac/main/source/transform.c
    modules/audio_processing/aec/aec_core.cc
    modules/audio_processing/aec/aec_core_sse2.cc
    modules/audio_processing/aec/aec_rdft.cc
    modules/audio_processing/aec/aec_rdft_sse2.cc
    modules/audio_processing/aec/aec_resampler.cc
    modules/audio_processing/aec/echo_cancellation.cc
    modules/audio_processing/aecm/aecm_core.cc
    modules/audio_processing/aecm/aecm_core_c.cc
    modules/audio_processing/aecm/echo_control_mobile.cc
    modules/audio_processing/agc/agc.cc
    modules/audio_processing/agc/agc_manager_direct.cc
    modules/audio_processing/agc/histogram.cc
    modules/audio_processing/agc/legacy/analog_agc.c
    modules/audio_processing/agc/legacy/digital_agc.c
    modules/audio_processing/agc/utility.cc
    modules/audio_processing/audio_buffer.cc
    modules/audio_processing/audio_processing_impl.cc
    modules/audio_processing/beamformer/array_util.cc
    modules/audio_processing/beamformer/covariance_matrix_generator.cc
    modules/audio_processing/beamformer/nonlinear_beamformer.cc
    modules/audio_processing/echo_cancellation_impl.cc
    modules/audio_processing/echo_control_mobile_impl.cc
    modules/audio_processing/gain_control_for_experimental_agc.cc
    modules/audio_processing/gain_control_impl.cc
    modules/audio_processing/high_pass_filter_impl.cc
    modules/audio_processing/intelligibility/intelligibility_enhancer.cc
    modules/audio_processing/intelligibility/intelligibility_utils.cc
    modules/audio_processing/level_estimator_impl.cc
    modules/audio_processing/logging/aec_logging_file_handling.cc
    modules/audio_processing/noise_suppression_impl.cc
    modules/audio_processing/ns/noise_suppression.c
    modules/audio_processing/ns/ns_core.c
    modules/audio_processing/rms_level.cc
    modules/audio_processing/splitting_filter.cc
    modules/audio_processing/three_band_filter_bank.cc
    modules/audio_processing/transient/file_utils.cc
    modules/audio_processing/transient/moving_moments.cc
    modules/audio_processing/transient/transient_detector.cc
    modules/audio_processing/transient/transient_suppressor.cc
    modules/audio_processing/transient/wpd_node.cc
    modules/audio_processing/transient/wpd_tree.cc
    modules/audio_processing/typing_detection.cc
    modules/audio_processing/utility/block_mean_calculator.cc
    modules/audio_processing/utility/delay_estimator.cc
    modules/audio_processing/utility/delay_estimator_wrapper.cc
    modules/audio_processing/vad/gmm.cc
    modules/audio_processing/vad/pitch_based_vad.cc
    modules/audio_processing/vad/pitch_internal.cc
    modules/audio_processing/vad/pole_zero_filter.cc
    modules/audio_processing/vad/standalone_vad.cc
    modules/audio_processing/vad/vad_audio_proc.cc
    modules/audio_processing/vad/vad_circular_buffer.cc
    modules/audio_processing/vad/voice_activity_detector.cc
    modules/audio_processing/voice_detection_impl.cc
​
    modules/audio_processing/test/test_utils.cc    
​
    system_wrappers/source/aligned_malloc.cc
    system_wrappers/source/cpu_features.cc
    system_wrappers/source/file_impl.cc
    system_wrappers/source/logging.cc
    system_wrappers/source/metrics_default.cc
    system_wrappers/source/rw_lock.cc
    system_wrappers/source/rw_lock_posix.cc
    system_wrappers/source/trace_impl.cc
    system_wrappers/source/trace_posix.cc
    
)function(prepend_path var prefix)
   set(listVar "")
   foreach(f ${ARGN})
      list(APPEND listVar "${prefix}/${f}")
   endforeach(f)
   set(${var} "${listVar}" PARENT_SCOPE)
endfunction(prepend_path)prepend_path(WEBRTC_SRC webrtc/webrtc ${WEBRTC_SRC_})
add_executable(webrtc-bf nonlinear_beamformer_test.cc ${WEBRTC_SRC})
target_link_libraries(webrtc-bf gflags pthread)
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") 
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")

爲了省事,將相關的文件全部加上了。然後在ubuntu下編譯,首先新建build,然後在build目錄裏執行cmake ..以及make命令

make
mkdir build
cmake ..
make

make成功後出現在這裏插入圖片描述
此時在build目錄下生成了可執行文件在這裏插入圖片描述
然後使用如下命令,

./webrtc-bf -i input.wav -mic_positions "x1 y1 z1 x2 y2 z2" -o output.wav

如果輸入的是兩通道的語音,那麼-mic_positions後面的座標就是兩個麥克風的座標,多通道的話,以此類推。因爲carmix.wav是一個雙通道的數據,所以將麥克風的座標設置爲0.02 0 0 -0.02 0 0,座標原點就是兩個麥克風的中心位置。
在這裏插入圖片描述
這樣就運行成功了。相關的文件已經上傳到github了:https://github.com/ctwgL/webrtc-beamforming
本文的介紹到此就結束了,webrtc裏的波束模塊具體的代碼細節,技術原理以及處理的效果將會在下篇文章中介紹。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章