From 14ee26e546bc3f41b8bbe0e80dcc32f9cd6039bd Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Tue, 15 Apr 2025 11:06:11 +0200 Subject: [PATCH 1/2] split up gain into two stages --- examples/TEMPLATE.yml | 12 ++++-- .../audiotools/__init__.py | 40 +++++++++++++++---- .../audiotools/convert/__init__.py | 28 +++++++++---- ivas_processing_scripts/processing/chains.py | 8 ++-- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index 21d2797d..91c7d36b 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -77,12 +77,14 @@ input: ### Pre-processing step performed prior to core processing for all conditions ### If not defined, preprocessing step is skipped # preprocessing: - ### Linear gain factor to be applied before any other processing - # gain: 3.1622776602 ### Target format used in rendering from input format; default = null (no rendering) # fmt: "7_1_4" ### Define mask (HP50 or 20KBP) for input signal filtering; default = null # mask: "HP50" + ### Gain factor to be applied BEFORE any other processing (linear, or add dB suffix) + # gain_pre: 10 dB + ### Gain factor to be applied AFTER any other processing (linear, or add dB suffix) + # gain_post: 3.1622776602 ### Target sampling rate in Hz for resampling; default = null (no resampling) # fs: 16000 ### Target loudness in LKFS; default = null (no loudness change applied) @@ -321,8 +323,10 @@ postprocessing: fmt: "BINAURAL" ### REQUIRED: Target sampling rate in Hz for resampling fs: 48000 - ### Linear gain factor to be applied before any other processing - # gain: 0.316227766 + ### Gain factor to be applied BEFORE any other processing (linear, or add dB suffix) + # gain_pre: 10 dB + ### Gain factor to be applied AFTER any other processing (linear, or add dB suffix) + # gain_post: 3.1622776602 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied) diff --git a/ivas_processing_scripts/audiotools/__init__.py b/ivas_processing_scripts/audiotools/__init__.py index 07598432..c8f6b170 100755 --- a/ivas_processing_scripts/audiotools/__init__.py +++ b/ivas_processing_scripts/audiotools/__init__.py @@ -51,6 +51,23 @@ def add_processing_args(group, input=True): p = "out" ps = "o" + # validation function(s) + def parse_gain(g: str) -> float: + g = g.strip() + try: + if g.lower().endswith("db"): + g = float(g[:-2].strip()) + g = 10 ** (g / 20) + else: + g = float(g) + + except ValueError: + raise argparse.ArgumentTypeError( + f"Invalid gain value '{g}' specified. Must be a number or a number suffixed with dB" + ) + + return g + group.add_argument( f"-{ps}", f"--{p}", @@ -93,6 +110,20 @@ def add_processing_args(group, input=True): help="Window the start/end of the signal by this amount in milliseconds (default = %(default)s)", default=None, ) + group.add_argument( + f"-{ps}gi", + f"--{p}_gain_pre", + type=parse_gain, + help="Apply the given pre-gain factor to the signal (suffix with dB to use nonlinear, e.g. '10 dB'), applied BEFORE other processing steps (default = %(default)s)", + default=None, + ) + group.add_argument( + f"-{ps}go", + f"--{p}_gain_post", + type=parse_gain, + help="Apply the given post-gain factor to the signal (suffix with dB to use nonlinear, e.g. '10 dB'), applied AFTER other processing steps (default = %(default)s)", + default=None, + ) group.add_argument( f"-{ps}t", f"--{p}_trim", @@ -114,13 +145,6 @@ def add_processing_args(group, input=True): help="Delay the signal by this amount in milliseconds (negative values advance, default = %(default)s)", default=None, ) - group.add_argument( - f"-{ps}g", - f"--{p}_gain", - type=float, - help="Apply the given linear gain factor to the signal, applied before other processing steps (default = %(default)s)", - default=None, - ) group.add_argument( f"-{ps}l", f"--{p}_loudness", @@ -256,6 +280,8 @@ def main(): ) elif args.input is not None: + # validate other arguments + if not args.out_fs: args.out_fs = args.in_fs diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 300f672f..374d29c2 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -190,7 +190,8 @@ def convert( in_cutoff: Optional[int] = None, in_mask: Optional[str] = None, in_window: Optional[list] = None, - in_gain: Optional[float] = None, + in_gain_pre: Optional[float] = None, + in_gain_post: Optional[float] = None, in_loudness: Optional[float] = None, in_loudness_fmt: Optional[str] = None, out_trim: Optional[list] = None, @@ -200,6 +201,8 @@ def convert( out_cutoff: Optional[int] = None, out_mask: Optional[str] = None, out_window: Optional[list] = None, + out_gain_pre: Optional[float] = None, + out_gain_post: Optional[float] = None, out_gain: Optional[float] = None, out_loudness: Optional[float] = None, out_loudness_fmt: Optional[str] = None, @@ -223,7 +226,8 @@ def convert( fc=in_cutoff, mask=in_mask, window=in_window, - gain=in_gain, + gain_pre=in_gain_pre, + gain_post=in_gain_post, loudness=in_loudness, loudness_fmt=in_loudness_fmt, spatial_distortion_amplitude=spatial_distortion_amplitude, @@ -244,7 +248,8 @@ def convert( fc=out_cutoff, mask=out_mask, window=out_window, - gain=out_gain, + gain_pre=out_gain_pre, + gain_post=out_gain_post, loudness=out_loudness, loudness_fmt=out_loudness_fmt, limit=limit, @@ -263,7 +268,8 @@ def process_audio( fc: Optional[int] = None, mask: Optional[str] = None, window: Optional[float] = None, - gain: Optional[float] = None, + gain_pre: Optional[float] = None, + gain_post: Optional[float] = None, loudness: Optional[float] = None, loudness_fmt: Optional[str] = None, limit: Optional[bool] = False, @@ -278,11 +284,11 @@ def process_audio( if fs is None: fs = x.fs - """gain""" - if gain is not None: + """pre-gain""" + if gain_pre is not None: if logger: - logger.debug(f"Applying linear gain factor of {gain}") - x.audio *= gain + logger.debug(f"Applying linear pre-gain factor of {gain_pre}") + x.audio *= gain_pre """delay audio""" if delay is not None: @@ -358,6 +364,12 @@ def process_audio( ) x.audio, _ = loudness_norm(x, loudness, loudness_fmt, logger=logger) + """post-gain""" + if gain_post is not None: + if logger: + logger.debug(f"Applying linear pre-gain factor of {gain_post}") + x.audio *= gain_post + """limiting""" if limit: if logger: diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index bd315e0b..111c9204 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -137,10 +137,11 @@ def get_preprocessing(cfg: TestConfig) -> dict: "in_pad_noise": pre_cfg.get("pad_noise", False), "in_delay": pre_cfg.get("delay"), "in_window": pre_cfg.get("window"), - "in_gain": pre_cfg.get("gain"), "in_loudness": pre_cfg.get("loudness"), "in_loudness_fmt": pre_cfg.get("loudness_fmt", post_fmt), "in_mask": pre_cfg.get("mask", None), + "in_gain_pre": pre_cfg.get("gain_pre"), + "out_gain_post": pre_cfg.get("gain_post"), "multiprocessing": cfg.multiprocessing, } ) @@ -566,12 +567,11 @@ def get_processing_chain( { "in_fs": tmp_in_fs, "in_fmt": tmp_in_fmt, + "in_gain_pre": post_cfg.get("gain_pre"), + "out_gain_post": post_cfg.get("gain_post"), "out_fs": post_cfg.get("fs"), "out_fmt": post_fmt, "out_cutoff": tmp_lp_cutoff, - "in_gain": post_cfg.get( - "gain" - ), # should be in_gain here since we want to apply it before any conversion/rendering "bin_dataset": post_cfg.get("bin_dataset"), "bin_lfe_gain": post_cfg.get("bin_lfe_gain"), "limit": post_cfg.get("limit", True), -- GitLab From 46d5cbd5c7a877cb1d611bc8336114e182b33d49 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Tue, 15 Apr 2025 11:38:51 +0200 Subject: [PATCH 2/2] move post gain to after limiter --- .../audiotools/convert/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 374d29c2..f1e56c43 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -364,18 +364,18 @@ def process_audio( ) x.audio, _ = loudness_norm(x, loudness, loudness_fmt, logger=logger) - """post-gain""" - if gain_post is not None: - if logger: - logger.debug(f"Applying linear pre-gain factor of {gain_post}") - x.audio *= gain_post - """limiting""" if limit: if logger: logger.debug("Applying limiter") audioarray.limiter(x.audio, x.fs) + """post-gain""" + if gain_post is not None: + if logger: + logger.debug(f"Applying linear pre-gain factor of {gain_post}") + x.audio *= gain_post + def format_conversion( input: audio.Audio, -- GitLab