From 5a00b48131a08acc820637265c4593eb7cd2dedb Mon Sep 17 00:00:00 2001 From: Simon Plain Date: Thu, 1 Jun 2023 16:58:46 +0200 Subject: [PATCH 1/2] HOA2 processing --- item_gen_configs/HOA2_CONFIG.yml | 58 +++++ .../audiotools/wrappers/reverb.py | 66 +++++- .../generation/process_hoa2_items.py | 218 ++++++++++++++++++ 3 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 item_gen_configs/HOA2_CONFIG.yml create mode 100644 ivas_processing_scripts/generation/process_hoa2_items.py diff --git a/item_gen_configs/HOA2_CONFIG.yml b/item_gen_configs/HOA2_CONFIG.yml new file mode 100644 index 00000000..7e41740e --- /dev/null +++ b/item_gen_configs/HOA2_CONFIG.yml @@ -0,0 +1,58 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "HOA2" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### IR sampling rate in Hz needed for headerless audio files; default = 48000 +IR_fs: 48000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +IR_path: "./IRs" + +### Output path for generated test items and metadata files +output_path: "./items_HOA2" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 0.5 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +scenes: + a1: + name: "out.wav" + description: "" + source: ["fa1.wav", "ma1.wav"] + IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"] + overlap: -0.2 + diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py index 0498e948..fc37b0f6 100644 --- a/ivas_processing_scripts/audiotools/wrappers/reverb.py +++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py @@ -238,7 +238,7 @@ def reverb_foa( H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) - # convolve mono input with left and right IR + # convolve mono input with FOA IR y_w = reverb(input, IR_w, align=align) y_x = reverb(input, IR_x, align=align) y_y = reverb(input, IR_y, align=align) @@ -251,3 +251,67 @@ def reverb_foa( y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y + + +def reverb_hoa2( + input: Audio, + hoa2_IR: Audio, + align: Optional[float] = None, +) -> Audio: + """ + Wrapper for the ITU-T reverb binary to convolve mono audio signal with an HOA2 impulse response + + Parameters + ---------- + input: Audio + Input audio signal + IR: Audio + Impulse response + align: float + multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file + + Returns + ------- + output: Audio + Convolved audio signal with HOA2 IR + """ + + # convert to float32 + hoa2_IR.audio = np.float32(hoa2_IR.audio) + + numchannels = 9 # HOA2 by definition + + # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB + if align is None: + H = fft(hoa2_IR.audio, axis=0) + align = 1.0 / np.max(np.abs(H)) + + IR = copy(hoa2_IR) + IR.name = "MONO" + IR.num_channels = 1 + ych = [] + for i in range(numchannels): + # separate IR into each channel + IR.audio = np.reshape(hoa2_IR.audio[:, i], (-1, 1)) + # convolve mono input with channel IR + ych.append(reverb(input, IR, align=align)) + + # combine into hoa2 output + y = copy(input) + y.name = "HOA2" + y.num_channels = numchannels + y.audio = np.column_stack( + [ + ych[0].audio, + ych[1].audio, + ych[2].audio, + ych[3].audio, + ych[4].audio, + ych[5].audio, + ych[6].audio, + ych[7].audio, + ych[8].audio, + ] + ) + + return y diff --git a/ivas_processing_scripts/generation/process_hoa2_items.py b/ivas_processing_scripts/generation/process_hoa2_items.py new file mode 100644 index 00000000..80bde6e8 --- /dev/null +++ b/ivas_processing_scripts/generation/process_hoa2_items.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +import os +from math import floor + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_hoa2 +from ivas_processing_scripts.generation import config + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + + +def generate_hoa2_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate HOA2 items from mono items based on scene description""" + + # get the number of scenes + N_scenes = len(cfg.scenes) + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the IR fs + if "IR_fs" not in cfg.__dict__: + cfg.IR_fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the IR path + if "IR_path" not in cfg.__dict__: + cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + # repeat for all source files + for scene_name, scene in cfg.scenes.items(): + logger.info( + f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + y = audio.SceneBasedAudio("HOA2") + for i in range(N_sources): + # parse parameters from the scene description + source_file = np.atleast_1d(scene["source"])[i] + IR_file = np.atleast_1d(scene["IR"])[i] + + logger.info(f"Convolving {source_file} with {IR_file}") + + # read source file + x = audio.fromfile( + "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs + ) + + # read the IR file + IR = audio.fromfile( + "HOA2", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs + ) + + # convolve with HOA2 IR + x = reverb_hoa2(x, IR) + + # adjust the level of the HOA2 signal + _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") + x.audio *= scale_factor + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0 and source_overlap != 0.0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift + N_delay += int(-source_overlap * x.fs) + + # insert all-zero preamble + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + N_frame = x.fs / 50 + if len(x.audio) % N_frame != 0: + N_pad = int(N_frame - len(x.audio) % N_frame) + + # insert all-zero preamble + pre = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # add source signal to the array of source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio + else: + # pad with zeros to have equal length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + ( + x.audio.shape[0] - y.audio.shape[0], + y.audio.shape[1], + ) + ), + ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + ( + y.audio.shape[0] - x.audio.shape[0], + x.audio.shape[1], + ) + ), + ) + ) + + # superimpose + y.audio += x.audio + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write the reverberated audio into output file + output_filename = scene["name"] + audiofile.write( + os.path.join(cfg.output_path, output_filename), y.audio, y.fs + ) # !!!! TBD: replace all os.path.xxx operations with the Path object + + return -- GitLab From 1a740cb3b4a301bb66afb0c1f0ad8617f4f9f6cc Mon Sep 17 00:00:00 2001 From: Simon Plain Date: Fri, 2 Jun 2023 09:11:01 +0200 Subject: [PATCH 2/2] Add __init__.py code and binauralize option --- item_gen_configs/HOA2_CONFIG.yml | 3 +++ item_gen_configs/P800-4.yml | 3 +++ item_gen_configs/P800-5.yml | 3 +++ ivas_processing_scripts/generation/__init__.py | 4 ++++ .../generation/process_foa_items.py | 17 ++++++++++++++++- .../generation/process_hoa2_items.py | 17 ++++++++++++++++- 6 files changed, 45 insertions(+), 2 deletions(-) diff --git a/item_gen_configs/HOA2_CONFIG.yml b/item_gen_configs/HOA2_CONFIG.yml index 7e41740e..3be0c57d 100644 --- a/item_gen_configs/HOA2_CONFIG.yml +++ b/item_gen_configs/HOA2_CONFIG.yml @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_HOA2" +### (Optional) Output path for binauralized versions of the generated HOA2 items +# binaural_path: "./items_HOA2_bin" + ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 diff --git a/item_gen_configs/P800-4.yml b/item_gen_configs/P800-4.yml index e137a438..253bb2c3 100644 --- a/item_gen_configs/P800-4.yml +++ b/item_gen_configs/P800-4.yml @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" +### (Optional) Output path for binauralized versions of the generated FOA items +# binaural_path: "./items_FOA_bin" + ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 diff --git a/item_gen_configs/P800-5.yml b/item_gen_configs/P800-5.yml index e137a438..253bb2c3 100644 --- a/item_gen_configs/P800-5.yml +++ b/item_gen_configs/P800-5.yml @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" +### (Optional) Output path for binauralized versions of the generated FOA items +# binaural_path: "./items_FOA_bin" + ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py index 59e396b9..bbd572a3 100755 --- a/ivas_processing_scripts/generation/__init__.py +++ b/ivas_processing_scripts/generation/__init__.py @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( from ivas_processing_scripts.generation import ( config, process_foa_items, + process_hoa2_items, process_ism_items, process_stereo_items, ) @@ -96,6 +97,9 @@ def main(args): elif cfg.format == "FOA": # generate FOA items according to scene description process_foa_items.generate_foa_items(cfg, logger) + elif cfg.format == "HOA2": + # generate HOA2 items according to scene description + process_hoa2_items.generate_hoa2_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: diff --git a/ivas_processing_scripts/generation/process_foa_items.py b/ivas_processing_scripts/generation/process_foa_items.py index 8c06966f..31be77e4 100644 --- a/ivas_processing_scripts/generation/process_foa_items.py +++ b/ivas_processing_scripts/generation/process_foa_items.py @@ -36,7 +36,7 @@ from math import floor import numpy as np -from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools import audio, audiofile, convert from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa from ivas_processing_scripts.generation import config @@ -86,6 +86,10 @@ def generate_foa_items( if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False + # setup binaural rendering + if "binaural_path" not in cfg.__dict__: + cfg.binaural_path = "" + # repeat for all source files for scene_name, scene in cfg.scenes.items(): logger.info( @@ -213,4 +217,15 @@ def generate_foa_items( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object + # convert to binaural if option chosen + if cfg.binaural_path != "": + binaudio = audio.fromtype("BINAURAL") + binaudio.fs = y.fs + convert.format_conversion(y, binaudio) + audiofile.write( + os.path.join(cfg.binaural_path, output_filename), + binaudio.audio, + binaudio.fs, + ) # !!!! TBD: replace all os.path.xxx operations with the Path object + return diff --git a/ivas_processing_scripts/generation/process_hoa2_items.py b/ivas_processing_scripts/generation/process_hoa2_items.py index 80bde6e8..b8a88adb 100644 --- a/ivas_processing_scripts/generation/process_hoa2_items.py +++ b/ivas_processing_scripts/generation/process_hoa2_items.py @@ -36,7 +36,7 @@ from math import floor import numpy as np -from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools import audio, audiofile, convert from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_hoa2 from ivas_processing_scripts.generation import config @@ -86,6 +86,10 @@ def generate_hoa2_items( if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False + # setup binaural rendering + if "binaural_path" not in cfg.__dict__: + cfg.binaural_path = "" + # repeat for all source files for scene_name, scene in cfg.scenes.items(): logger.info( @@ -215,4 +219,15 @@ def generate_hoa2_items( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object + # convert to binaural if option chosen + if cfg.binaural_path != "": + binaudio = audio.fromtype("BINAURAL") + binaudio.fs = y.fs + convert.format_conversion(y, binaudio) + audiofile.write( + os.path.join(cfg.binaural_path, output_filename), + binaudio.audio, + binaudio.fs, + ) # !!!! TBD: replace all os.path.xxx operations with the Path object + return -- GitLab