Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
8 changes: 8 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
pkg/provider/resources/*.h264 filter=lfs diff=lfs merge=lfs -text
pkg/provider/resources/*.ivf filter=lfs diff=lfs merge=lfs -text
pkg/provider/resources/*.ogg filter=lfs diff=lfs merge=lfs -text

# Vendored C/C++ source (WebRTC APM, PortAudio, abseil, pffft, rnnoise)
pkg/apm/webrtc/**/*.cc linguist-vendored
pkg/apm/webrtc/**/*.c linguist-vendored
pkg/apm/webrtc/**/*.h linguist-vendored
pkg/apm/webrtc/**/*.m linguist-vendored
pkg/apm/webrtc/**/*.inc linguist-vendored
pkg/portaudio/pa_src/** linguist-vendored
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "pkg/portaudio/pa_src"]
path = pkg/portaudio/pa_src
url = https://github.com/PortAudio/portaudio.git
182 changes: 182 additions & 0 deletions pkg/apm/apm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
//go:build console

// Package apm provides Go bindings for the WebRTC Audio Processing Module (APM).
// It supports echo cancellation (AEC3), noise suppression, automatic gain control,
// and high-pass filtering. Audio must be 48kHz int16 PCM in 10ms frames (480 samples/channel).
package apm

// #include "bridge.h"
import "C"

import (
"errors"
"runtime"
"unsafe"
)

type APMConfig struct {
EchoCanceller bool
GainController bool
HighPassFilter bool
NoiseSuppressor bool
CaptureChannels int
RenderChannels int
}

func DefaultConfig() APMConfig {
return APMConfig{
EchoCanceller: true,
GainController: true,
HighPassFilter: true,
NoiseSuppressor: true,
CaptureChannels: 1,
RenderChannels: 1,
}
}

type APM struct {
handle C.ApmHandle
}

func NewAPM(config APMConfig) (*APM, error) {
capCh := config.CaptureChannels
if capCh == 0 {
capCh = 1
}
renCh := config.RenderChannels
if renCh == 0 {
renCh = 1
}

var cerr C.int
handle := C.apm_create(
boolToInt(config.EchoCanceller),
boolToInt(config.GainController),
boolToInt(config.HighPassFilter),
boolToInt(config.NoiseSuppressor),
C.int(capCh),
C.int(renCh),
&cerr,
)
if handle == nil {
return nil, errors.New("apm: failed to create audio processing module")
}

a := &APM{handle: handle}
runtime.SetFinalizer(a, func(a *APM) { a.Close() })
return a, nil
}

// ProcessCapture processes a 10ms capture (microphone) frame in-place.
// samples must contain exactly 480 * numChannels int16 values.
func (a *APM) ProcessCapture(samples []int16) error {
if a.handle == nil {
return errors.New("apm: closed")
}
if len(samples) == 0 {
return nil
}
numChannels := len(samples) / 480
if numChannels == 0 {
numChannels = 1
}
ret := C.apm_process_capture(
a.handle,
(*C.int16_t)(unsafe.Pointer(&samples[0])),
C.int(numChannels),
)
if ret != 0 {
return errors.New("apm: ProcessCapture failed")
}
return nil
}

// ProcessRender processes a 10ms render (speaker/far-end) frame in-place.
// This feeds the echo canceller with the signal being played back.
// samples must contain exactly 480 * numChannels int16 values.
func (a *APM) ProcessRender(samples []int16) error {
if a.handle == nil {
return errors.New("apm: closed")
}
if len(samples) == 0 {
return nil
}
numChannels := len(samples) / 480
if numChannels == 0 {
numChannels = 1
}
ret := C.apm_process_render(
a.handle,
(*C.int16_t)(unsafe.Pointer(&samples[0])),
C.int(numChannels),
)
if ret != 0 {
return errors.New("apm: ProcessRender failed")
}
return nil
}

// SetStreamDelayMs sets the delay in milliseconds between the far-end signal
// being rendered and arriving at the near-end microphone.
func (a *APM) SetStreamDelayMs(ms int) {
if a.handle == nil {
return
}
C.apm_set_stream_delay_ms(a.handle, C.int(ms))
}

func (a *APM) StreamDelayMs() int {
if a.handle == nil {
return 0
}
return int(C.apm_stream_delay_ms(a.handle))
}

// Stats holds AEC statistics from the WebRTC APM.
type Stats struct {
EchoReturnLoss float64 // ERL in dB (higher = more echo removed)
EchoReturnLossEnhancement float64 // ERLE in dB (higher = better cancellation)
DivergentFilterFraction float64 // 0-1, fraction of time filter is divergent
DelayMs int // Estimated echo path delay
ResidualEchoLikelihood float64 // 0-1, likelihood of residual echo
HasERL bool
HasERLE bool
HasDelay bool
HasResidualEcho bool
HasDivergent bool
}

// GetStats returns the current AEC statistics.
func (a *APM) GetStats() Stats {
if a.handle == nil {
return Stats{}
}
var cs C.ApmStats
C.apm_get_stats(a.handle, &cs)
return Stats{
EchoReturnLoss: float64(cs.echo_return_loss),
EchoReturnLossEnhancement: float64(cs.echo_return_loss_enhancement),
DivergentFilterFraction: float64(cs.divergent_filter_fraction),
DelayMs: int(cs.delay_ms),
ResidualEchoLikelihood: float64(cs.residual_echo_likelihood),
HasERL: cs.has_erl != 0,
HasERLE: cs.has_erle != 0,
HasDelay: cs.has_delay != 0,
HasResidualEcho: cs.has_residual_echo != 0,
HasDivergent: cs.has_divergent != 0,
}
}

func (a *APM) Close() {
if a.handle != nil {
C.apm_destroy(a.handle)
a.handle = nil
}
}

func boolToInt(b bool) C.int {
if b {
return 1
}
return 0
}
97 changes: 97 additions & 0 deletions pkg/apm/bridge.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "bridge.h"

#include "api/audio/builtin_audio_processing_builder.h"
#include "api/environment/environment_factory.h"
#include "api/scoped_refptr.h"
#include "modules/audio_processing/include/audio_processing.h"

#include <memory>

struct ApmInstance {
webrtc::scoped_refptr<webrtc::AudioProcessing> apm;
};

extern "C" {

ApmHandle apm_create(int echo, int gain, int hpf, int ns,
int capture_ch, int render_ch, int* err) {
(void)capture_ch;
(void)render_ch;

auto apm = webrtc::BuiltinAudioProcessingBuilder().Build(
webrtc::CreateEnvironment());
if (!apm) {
if (err) *err = -1;
return nullptr;
}

webrtc::AudioProcessing::Config config;
config.echo_canceller.enabled = (echo != 0);
config.gain_controller1.enabled = false;
config.gain_controller2.enabled = (gain != 0);
config.high_pass_filter.enabled = (hpf != 0);
config.noise_suppression.enabled = (ns != 0);
if (ns) {
config.noise_suppression.level =
webrtc::AudioProcessing::Config::NoiseSuppression::kHigh;
}

apm->ApplyConfig(config);
apm->Initialize();

auto* inst = new ApmInstance{std::move(apm)};
if (err) *err = 0;
return static_cast<ApmHandle>(inst);
}

void apm_destroy(ApmHandle h) {
if (h) {
delete static_cast<ApmInstance*>(h);
}
}

int apm_process_capture(ApmHandle h, int16_t* samples, int num_channels) {
auto* inst = static_cast<ApmInstance*>(h);
// 10ms at 48kHz = 480 samples per channel
webrtc::StreamConfig stream_cfg(48000, num_channels);
return inst->apm->ProcessStream(samples, stream_cfg, stream_cfg, samples);
}

int apm_process_render(ApmHandle h, int16_t* samples, int num_channels) {
auto* inst = static_cast<ApmInstance*>(h);
webrtc::StreamConfig stream_cfg(48000, num_channels);
return inst->apm->ProcessReverseStream(samples, stream_cfg, stream_cfg, samples);
}

void apm_set_stream_delay_ms(ApmHandle h, int delay_ms) {
auto* inst = static_cast<ApmInstance*>(h);
inst->apm->set_stream_delay_ms(delay_ms);
}

int apm_stream_delay_ms(ApmHandle h) {
auto* inst = static_cast<ApmInstance*>(h);
return inst->apm->stream_delay_ms();
}

void apm_get_stats(ApmHandle h, ApmStats* out) {
if (!h || !out) return;
auto* inst = static_cast<ApmInstance*>(h);
auto stats = inst->apm->GetStatistics();

out->has_erl = stats.echo_return_loss.has_value() ? 1 : 0;
out->echo_return_loss = stats.echo_return_loss.value_or(0.0);

out->has_erle = stats.echo_return_loss_enhancement.has_value() ? 1 : 0;
out->echo_return_loss_enhancement = stats.echo_return_loss_enhancement.value_or(0.0);

out->has_divergent = stats.divergent_filter_fraction.has_value() ? 1 : 0;
out->divergent_filter_fraction = stats.divergent_filter_fraction.value_or(0.0);

out->has_delay = stats.delay_ms.has_value() ? 1 : 0;
out->delay_ms = stats.delay_ms.value_or(0);

out->has_residual_echo = stats.residual_echo_likelihood.has_value() ? 1 : 0;
out->residual_echo_likelihood = stats.residual_echo_likelihood.value_or(0.0);
}

} // extern "C"
16 changes: 16 additions & 0 deletions pkg/apm/bridge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//go:build console

package apm

// #cgo CXXFLAGS: -I${SRCDIR}/webrtc -I${SRCDIR}/webrtc/third_party/abseil-cpp -std=c++17 -fno-rtti -DWEBRTC_APM_DEBUG_DUMP=0 -DWEBRTC_AUDIO_PROCESSING_ONLY_BUILD -DNDEBUG -Wno-unused-parameter -Wno-missing-field-initializers -Wno-sign-compare -Wno-deprecated-declarations -Wno-nullability-completeness -Wno-shorten-64-to-32
// #cgo darwin CXXFLAGS: -DWEBRTC_MAC -DWEBRTC_POSIX
// #cgo linux CXXFLAGS: -DWEBRTC_LINUX -DWEBRTC_POSIX
// #cgo windows CXXFLAGS: -DWEBRTC_WIN
// #cgo arm64 CXXFLAGS: -DWEBRTC_HAS_NEON -DWEBRTC_ARCH_ARM64
// #cgo darwin LDFLAGS: -lc++
// #cgo linux LDFLAGS: -lc++ -lm -lpthread
// #cgo windows LDFLAGS: -lc++
// #include "bridge.h"
import "C"

import _ "github.com/livekit/livekit-cli/v2/pkg/apm/webrtc"
52 changes: 52 additions & 0 deletions pkg/apm/bridge.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef LK_APM_BRIDGE_H
#define LK_APM_BRIDGE_H

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef void* ApmHandle;

// Create an APM instance. Returns NULL on error, sets *err to non-zero.
ApmHandle apm_create(int echo, int gain, int hpf, int ns,
int capture_ch, int render_ch, int* err);

// Destroy an APM instance.
void apm_destroy(ApmHandle h);

// Process a 10ms capture frame in-place. Returns 0 on success.
int apm_process_capture(ApmHandle h, int16_t* samples, int num_channels);

// Process a 10ms render (far-end/playback) frame in-place. Returns 0 on success.
int apm_process_render(ApmHandle h, int16_t* samples, int num_channels);

// Set the stream delay in milliseconds for echo cancellation.
void apm_set_stream_delay_ms(ApmHandle h, int delay_ms);

// Get the current stream delay in milliseconds.
int apm_stream_delay_ms(ApmHandle h);

// AEC statistics returned by apm_get_stats.
typedef struct {
int has_erl;
double echo_return_loss; // ERL in dB
int has_erle;
double echo_return_loss_enhancement; // ERLE in dB
int has_divergent;
double divergent_filter_fraction;
int has_delay;
int delay_ms;
int has_residual_echo;
double residual_echo_likelihood;
} ApmStats;

// Get current AEC statistics.
void apm_get_stats(ApmHandle h, ApmStats* out);

#ifdef __cplusplus
}
#endif

#endif // LK_APM_BRIDGE_H
Loading
Loading