From 5a00b48131a08acc820637265c4593eb7cd2dedb Mon Sep 17 00:00:00 2001
From: Simon Plain <simon.plain@dolby.com>
Date: Thu, 1 Jun 2023 16:58:46 +0200
Subject: [PATCH 1/2] HOA2 processing

---
 item_gen_configs/HOA2_CONFIG.yml              |  58 +++++
 .../audiotools/wrappers/reverb.py             |  66 +++++-
 .../generation/process_hoa2_items.py          | 218 ++++++++++++++++++
 3 files changed, 341 insertions(+), 1 deletion(-)
 create mode 100644 item_gen_configs/HOA2_CONFIG.yml
 create mode 100644 ivas_processing_scripts/generation/process_hoa2_items.py

diff --git a/item_gen_configs/HOA2_CONFIG.yml b/item_gen_configs/HOA2_CONFIG.yml
new file mode 100644
index 00000000..7e41740e
--- /dev/null
+++ b/item_gen_configs/HOA2_CONFIG.yml
@@ -0,0 +1,58 @@
+---
+################################################
+# General configuration
+################################################
+
+### Output format
+format: "HOA2"
+
+### Output sampling rate in Hz needed for headerless audio files; default = 48000
+fs: 48000
+
+### IR sampling rate in Hz needed for headerless audio files; default = 48000
+IR_fs: 48000
+
+### Any relative paths will be interpreted relative to the working directory the script is called from!
+### Usage of absolute paths is recommended.
+### Do not use file names with dots "." in them! This is not supported, use "_" instead
+### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
+
+### Input path to mono files
+input_path: "./items_mono"
+
+### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR'
+IR_path: "./IRs"
+
+### Output path for generated test items and metadata files
+output_path: "./items_HOA2"
+
+### Target loudness in LKFS; default = null (no loudness normalization applied)
+loudness: -26
+
+### Pre-amble and Post-amble length in seconds (default = 0.0)
+preamble: 0.5
+postamble: 1.0
+
+### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
+add_low_level_random_noise: true
+
+
+################################################
+### Scene description
+################################################
+
+### Each scene must start with the sceneN tag
+### Specify the mono source filename (the program will search for it in the input_path folder)
+### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder)
+### Specify the overlap length in seconds for each input source (negative value creates a gap)
+### Note 1: use [val1, val2, ...] for multiple sources in a scene
+### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
+
+scenes:
+    a1: 
+        name: "out.wav"
+        description: ""
+        source: ["fa1.wav", "ma1.wav"]
+        IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"]
+        overlap: -0.2
+        
diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py
index 0498e948..fc37b0f6 100644
--- a/ivas_processing_scripts/audiotools/wrappers/reverb.py
+++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py
@@ -238,7 +238,7 @@ def reverb_foa(
         H = fft(foa_IR.audio, axis=0)
         align = 1.0 / np.max(np.abs(H))
 
-    # convolve mono input with left and right IR
+    # convolve mono input with FOA IR
     y_w = reverb(input, IR_w, align=align)
     y_x = reverb(input, IR_x, align=align)
     y_y = reverb(input, IR_y, align=align)
@@ -251,3 +251,67 @@ def reverb_foa(
     y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])
 
     return y
+
+
+def reverb_hoa2(
+    input: Audio,
+    hoa2_IR: Audio,
+    align: Optional[float] = None,
+) -> Audio:
+    """
+    Wrapper for the ITU-T reverb binary to convolve mono audio signal with an HOA2 impulse response
+
+    Parameters
+    ----------
+    input: Audio
+        Input audio signal
+    IR: Audio
+        Impulse response
+    align: float
+         multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file
+
+    Returns
+    -------
+    output: Audio
+        Convolved audio signal with HOA2 IR
+    """
+
+    # convert to float32
+    hoa2_IR.audio = np.float32(hoa2_IR.audio)
+
+    numchannels = 9  # HOA2 by definition
+
+    # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
+    if align is None:
+        H = fft(hoa2_IR.audio, axis=0)
+        align = 1.0 / np.max(np.abs(H))
+
+    IR = copy(hoa2_IR)
+    IR.name = "MONO"
+    IR.num_channels = 1
+    ych = []
+    for i in range(numchannels):
+        # separate IR into each channel
+        IR.audio = np.reshape(hoa2_IR.audio[:, i], (-1, 1))
+        # convolve mono input with channel IR
+        ych.append(reverb(input, IR, align=align))
+
+    # combine into hoa2 output
+    y = copy(input)
+    y.name = "HOA2"
+    y.num_channels = numchannels
+    y.audio = np.column_stack(
+        [
+            ych[0].audio,
+            ych[1].audio,
+            ych[2].audio,
+            ych[3].audio,
+            ych[4].audio,
+            ych[5].audio,
+            ych[6].audio,
+            ych[7].audio,
+            ych[8].audio,
+        ]
+    )
+
+    return y
diff --git a/ivas_processing_scripts/generation/process_hoa2_items.py b/ivas_processing_scripts/generation/process_hoa2_items.py
new file mode 100644
index 00000000..80bde6e8
--- /dev/null
+++ b/ivas_processing_scripts/generation/process_hoa2_items.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+import os
+from math import floor
+
+import numpy as np
+
+from ivas_processing_scripts.audiotools import audio, audiofile
+from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
+from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_hoa2
+from ivas_processing_scripts.generation import config
+
+SEED_RANDOM_NOISE = 0
+
+
+# function for converting nd numpy array to strings with 2 decimal digits
+def csv_formatdata(data):
+    for row in data:
+        yield ["%0.2f" % v for v in row]
+
+
+def generate_hoa2_items(
+    cfg: config.TestConfig,
+    logger: logging.Logger,
+):
+    """Generate HOA2 items from mono items based on scene description"""
+
+    # get the number of scenes
+    N_scenes = len(cfg.scenes)
+
+    # set the target level
+    if "loudness" not in cfg.__dict__:
+        cfg.loudness = -26
+
+    # set the fs
+    if "fs" not in cfg.__dict__:
+        cfg.fs = 48000
+
+    # set the IR fs
+    if "IR_fs" not in cfg.__dict__:
+        cfg.IR_fs = 48000
+
+    # set the pre-amble and post-amble
+    if "preamble" not in cfg.__dict__:
+        cfg.preamble = 0.0
+
+    if "postamble" not in cfg.__dict__:
+        cfg.postamble = 0.0
+
+    # set the IR path
+    if "IR_path" not in cfg.__dict__:
+        cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR")
+
+    # set the pre-amble and post-amble
+    if "add_low_level_random_noise" not in cfg.__dict__:
+        cfg.add_low_level_random_noise = False
+
+    # repeat for all source files
+    for scene_name, scene in cfg.scenes.items():
+        logger.info(
+            f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}"
+        )
+
+        # extract the number of audio sources
+        N_sources = len(np.atleast_1d(scene["source"]))
+
+        # read the overlap length
+        if "overlap" in scene.keys():
+            source_overlap = float(scene["overlap"])
+        else:
+            source_overlap = 0.0
+
+        y = audio.SceneBasedAudio("HOA2")
+        for i in range(N_sources):
+            # parse parameters from the scene description
+            source_file = np.atleast_1d(scene["source"])[i]
+            IR_file = np.atleast_1d(scene["IR"])[i]
+
+            logger.info(f"Convolving {source_file} with {IR_file}")
+
+            # read source file
+            x = audio.fromfile(
+                "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
+            )
+
+            # read the IR file
+            IR = audio.fromfile(
+                "HOA2", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs
+            )
+
+            # convolve with HOA2 IR
+            x = reverb_hoa2(x, IR)
+
+            # adjust the level of the HOA2 signal
+            _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL")
+            x.audio *= scale_factor
+
+            # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
+            if i > 0 and source_overlap != 0.0:
+                # get the length of the first source file
+                N_delay = len(y.audio[:, 0])
+
+                # add the shift
+                N_delay += int(-source_overlap * x.fs)
+
+                # insert all-zero preamble
+                pre = np.zeros((N_delay, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+
+            # pad with zeros to ensure that the signal length is a multiple of 20ms
+            N_frame = x.fs / 50
+            if len(x.audio) % N_frame != 0:
+                N_pad = int(N_frame - len(x.audio) % N_frame)
+
+                # insert all-zero preamble
+                pre = np.zeros((N_pad, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+
+            # add source signal to the array of source signals
+            y.fs = x.fs
+            if y.audio is None:
+                y.audio = x.audio
+            else:
+                # pad with zeros to have equal length of all source signals
+                if x.audio.shape[0] > y.audio.shape[0]:
+                    y.audio = np.vstack(
+                        (
+                            y.audio,
+                            np.zeros(
+                                (
+                                    x.audio.shape[0] - y.audio.shape[0],
+                                    y.audio.shape[1],
+                                )
+                            ),
+                        )
+                    )
+                elif y.audio.shape[0] > x.audio.shape[0]:
+                    x.audio = np.vstack(
+                        (
+                            x.audio,
+                            np.zeros(
+                                (
+                                    y.audio.shape[0] - x.audio.shape[0],
+                                    x.audio.shape[1],
+                                )
+                            ),
+                        )
+                    )
+
+                # superimpose
+                y.audio += x.audio
+
+        # append pre-amble and post-amble to all sources
+        if cfg.preamble != 0.0:
+            # ensure that pre-amble is a multiple of 20ms
+            N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
+
+            # insert all-zero preamble to all sources
+            pre = np.zeros((N_pre, y.audio.shape[1]))
+            y.audio = np.concatenate([pre, y.audio])
+
+        if cfg.postamble != 0.0:
+            # ensure that post-mable is a multiple of 20ms
+            N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
+
+            # append all-zero postamble to all sources
+            post = np.zeros((N_post, y.audio.shape[1]))
+            y.audio = np.concatenate([y.audio, post])
+
+        # add random noise
+        if cfg.add_low_level_random_noise:
+            # create uniformly distributed noise between -4 and 4
+            np.random.seed(SEED_RANDOM_NOISE)
+            noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
+                "float"
+            )
+
+            # superimpose
+            y.audio += noise
+
+        # write the reverberated audio into output file
+        output_filename = scene["name"]
+        audiofile.write(
+            os.path.join(cfg.output_path, output_filename), y.audio, y.fs
+        )  # !!!! TBD: replace all os.path.xxx operations with the Path object
+
+    return
-- 
GitLab


From 1a740cb3b4a301bb66afb0c1f0ad8617f4f9f6cc Mon Sep 17 00:00:00 2001
From: Simon Plain <simon.plain@dolby.com>
Date: Fri, 2 Jun 2023 09:11:01 +0200
Subject: [PATCH 2/2] Add __init__.py code and binauralize option

---
 item_gen_configs/HOA2_CONFIG.yml                |  3 +++
 item_gen_configs/P800-4.yml                     |  3 +++
 item_gen_configs/P800-5.yml                     |  3 +++
 ivas_processing_scripts/generation/__init__.py  |  4 ++++
 .../generation/process_foa_items.py             | 17 ++++++++++++++++-
 .../generation/process_hoa2_items.py            | 17 ++++++++++++++++-
 6 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/item_gen_configs/HOA2_CONFIG.yml b/item_gen_configs/HOA2_CONFIG.yml
index 7e41740e..3be0c57d 100644
--- a/item_gen_configs/HOA2_CONFIG.yml
+++ b/item_gen_configs/HOA2_CONFIG.yml
@@ -26,6 +26,9 @@ IR_path: "./IRs"
 ### Output path for generated test items and metadata files
 output_path: "./items_HOA2"
 
+### (Optional) Output path for binauralized versions of the generated HOA2 items
+# binaural_path: "./items_HOA2_bin"
+
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
diff --git a/item_gen_configs/P800-4.yml b/item_gen_configs/P800-4.yml
index e137a438..253bb2c3 100644
--- a/item_gen_configs/P800-4.yml
+++ b/item_gen_configs/P800-4.yml
@@ -26,6 +26,9 @@ IR_path: "./IRs"
 ### Output path for generated test items and metadata files
 output_path: "./items_FOA"
 
+### (Optional) Output path for binauralized versions of the generated FOA items
+# binaural_path: "./items_FOA_bin"
+
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
diff --git a/item_gen_configs/P800-5.yml b/item_gen_configs/P800-5.yml
index e137a438..253bb2c3 100644
--- a/item_gen_configs/P800-5.yml
+++ b/item_gen_configs/P800-5.yml
@@ -26,6 +26,9 @@ IR_path: "./IRs"
 ### Output path for generated test items and metadata files
 output_path: "./items_FOA"
 
+### (Optional) Output path for binauralized versions of the generated FOA items
+# binaural_path: "./items_FOA_bin"
+
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py
index 59e396b9..bbd572a3 100755
--- a/ivas_processing_scripts/generation/__init__.py
+++ b/ivas_processing_scripts/generation/__init__.py
@@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import (
 from ivas_processing_scripts.generation import (
     config,
     process_foa_items,
+    process_hoa2_items,
     process_ism_items,
     process_stereo_items,
 )
@@ -96,6 +97,9 @@ def main(args):
     elif cfg.format == "FOA":
         # generate FOA items according to scene description
         process_foa_items.generate_foa_items(cfg, logger)
+    elif cfg.format == "HOA2":
+        # generate HOA2 items according to scene description
+        process_hoa2_items.generate_hoa2_items(cfg, logger)
 
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
diff --git a/ivas_processing_scripts/generation/process_foa_items.py b/ivas_processing_scripts/generation/process_foa_items.py
index 8c06966f..31be77e4 100644
--- a/ivas_processing_scripts/generation/process_foa_items.py
+++ b/ivas_processing_scripts/generation/process_foa_items.py
@@ -36,7 +36,7 @@ from math import floor
 
 import numpy as np
 
-from ivas_processing_scripts.audiotools import audio, audiofile
+from ivas_processing_scripts.audiotools import audio, audiofile, convert
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa
 from ivas_processing_scripts.generation import config
@@ -86,6 +86,10 @@ def generate_foa_items(
     if "add_low_level_random_noise" not in cfg.__dict__:
         cfg.add_low_level_random_noise = False
 
+    # setup binaural rendering
+    if "binaural_path" not in cfg.__dict__:
+        cfg.binaural_path = ""
+
     # repeat for all source files
     for scene_name, scene in cfg.scenes.items():
         logger.info(
@@ -213,4 +217,15 @@ def generate_foa_items(
             os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
+        # convert to binaural if option chosen
+        if cfg.binaural_path != "":
+            binaudio = audio.fromtype("BINAURAL")
+            binaudio.fs = y.fs
+            convert.format_conversion(y, binaudio)
+            audiofile.write(
+                os.path.join(cfg.binaural_path, output_filename),
+                binaudio.audio,
+                binaudio.fs,
+            )  # !!!! TBD: replace all os.path.xxx operations with the Path object
+
     return
diff --git a/ivas_processing_scripts/generation/process_hoa2_items.py b/ivas_processing_scripts/generation/process_hoa2_items.py
index 80bde6e8..b8a88adb 100644
--- a/ivas_processing_scripts/generation/process_hoa2_items.py
+++ b/ivas_processing_scripts/generation/process_hoa2_items.py
@@ -36,7 +36,7 @@ from math import floor
 
 import numpy as np
 
-from ivas_processing_scripts.audiotools import audio, audiofile
+from ivas_processing_scripts.audiotools import audio, audiofile, convert
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_hoa2
 from ivas_processing_scripts.generation import config
@@ -86,6 +86,10 @@ def generate_hoa2_items(
     if "add_low_level_random_noise" not in cfg.__dict__:
         cfg.add_low_level_random_noise = False
 
+    # setup binaural rendering
+    if "binaural_path" not in cfg.__dict__:
+        cfg.binaural_path = ""
+
     # repeat for all source files
     for scene_name, scene in cfg.scenes.items():
         logger.info(
@@ -215,4 +219,15 @@ def generate_hoa2_items(
             os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
+        # convert to binaural if option chosen
+        if cfg.binaural_path != "":
+            binaudio = audio.fromtype("BINAURAL")
+            binaudio.fs = y.fs
+            convert.format_conversion(y, binaudio)
+            audiofile.write(
+                os.path.join(cfg.binaural_path, output_filename),
+                binaudio.audio,
+                binaudio.fs,
+            )  # !!!! TBD: replace all os.path.xxx operations with the Path object
+
     return
-- 
GitLab