From ff88325e958feead62c7d2d6e42bd3215a55e410 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 9 Jun 2023 15:24:54 +0200 Subject: [PATCH 1/6] adapted ISM metadata to 8 columns and enabled pass through to encoder --- ivas_processing_scripts/audiotools/audio.py | 24 +++++++++++-------- .../audiotools/constants.py | 4 ++++ .../audiotools/metadata.py | 12 ++++------ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 0922ae38..3fe1db04 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -46,6 +46,8 @@ from ivas_processing_scripts.audiotools.constants import ( METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, + NUMBER_COLUMNS_ISM_METADATA, + DEFAULT_ISM_METADATA ) from .EFAP import wrap_angles @@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio): return obj def init_metadata(self): + # check if number of metadata files matches format if self.audio.shape[1] != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" @@ -305,16 +308,17 @@ class ObjectBasedAudio(Audio): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns - if pos.shape[1] < 5: - raise ValueError("Metadata incomplete. Columns are missing.") - elif pos.shape[1] > 5: - if pos.shape[1] <= 8: - # TODO: FIXME - pos = pos[:, :5] - else: - raise ValueError( - "Too many columns in metadata (possibly old version with frame index used)" - ) + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.") + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError( + "Too many columns in metadata" + ) + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]) # check if metadata is longer than file -> cut off num_frames = int( diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index f9f020c8..d84b938a 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -703,3 +703,7 @@ DELAY_COMPENSATION_FOR_FILTERING = { "HP50_32KHZ": 559, "HP50_48KHZ": 839, } + +DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] +FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"] +NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA) diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index 436dc633..adbf40eb 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -40,7 +40,7 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read -from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS +from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA class Metadata: @@ -204,13 +204,9 @@ def write_ISM_metadata_in_file( with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): - row_list = [ - "%+07.2f" % np.round(metadata[i][k, 0], 2), - "%+06.2f" % np.round(metadata[i][k, 1], 2), - "01.00", - "000.00", - "1.00", - ] + row_list = [] + for p in range(NUMBER_COLUMNS_ISM_METADATA): + row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)) writer.writerow(row_list) return file_names -- GitLab From b0049e193bb9aedb9df00d8cc939b5ab29c693c8 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 9 Jun 2023 17:49:52 +0200 Subject: [PATCH 2/6] fixed ISM metadata preamble --- ivas_processing_scripts/audiotools/metadata.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index adbf40eb..0b08c173 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -40,7 +40,7 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read -from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA +from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA class Metadata: @@ -551,14 +551,10 @@ def add_remove_preamble( for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: - metadata[obj_idx] = trim( - metadata[obj_idx], - limits=(-int(preamble_frames), 0), - samples=True, + num_columns = metadata[obj_idx].shape[1] + metadata[obj_idx] = np.vstack( + [np.repeat(np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0), metadata[obj_idx]] ) - - # add radius 1 - metadata[obj_idx][: int(preamble_frames), 2] = 1 else: metadata[obj_idx] = trim( metadata[obj_idx], -- GitLab From 6611cf9dfd0893f6692f28196ccd798a6f3dccb2 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 12 Jun 2023 15:10:57 +0200 Subject: [PATCH 3/6] fixed small issue and added postamble for ism metadata --- ivas_processing_scripts/audiotools/audio.py | 14 +++-- .../audiotools/constants.py | 11 +++- .../audiotools/metadata.py | 55 +++++++++++++++++-- .../processing/preprocessing_2.py | 19 +++++-- .../processing/processing.py | 31 +++++++---- .../processing_splitting_scaling.py | 11 ---- 6 files changed, 100 insertions(+), 41 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 3fe1db04..5e62b3fb 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -42,12 +42,12 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, + DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, + NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, - NUMBER_COLUMNS_ISM_METADATA, - DEFAULT_ISM_METADATA ) from .EFAP import wrap_angles @@ -310,15 +310,17 @@ class ObjectBasedAudio(Audio): # check if metadata has right number of columns num_columns = pos.shape[1] if num_columns < 2: - raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.") - elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError( - "Too many columns in metadata" + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: - pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]) + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) # check if metadata is longer than file -> cut off num_frames = int( diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index d84b938a..a77dcce7 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -705,5 +705,14 @@ DELAY_COMPENSATION_FOR_FILTERING = { } DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] -FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"] +FORMAT_ISM_METADATA_CSV = [ + "%+07.2f", + "%+06.2f", + "%05.2f", + "%06.2f", + "%04.2f", + "%+07.2f", + "%+06.2f", + "%1.0f", +] NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA) diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index 0b08c173..af87fe3f 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read -from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA +from ivas_processing_scripts.audiotools.constants import ( + DEFAULT_ISM_METADATA, + FORMAT_ISM_METADATA_CSV, + IVAS_FRAME_LEN_MS, + NUMBER_COLUMNS_ISM_METADATA, +) class Metadata: @@ -201,12 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] + number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): row_list = [] - for p in range(NUMBER_COLUMNS_ISM_METADATA): - row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)) + for p in range(number_columns): + row_list.append( + FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) + ) writer.writerow(row_list) return file_names @@ -370,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: - concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) + concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) @@ -540,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, + postamble, add: Optional[bool] = True, ): + # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( @@ -553,7 +563,14 @@ def add_remove_preamble( if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( - [np.repeat(np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0), metadata[obj_idx]] + [ + np.repeat( + np.array(DEFAULT_ISM_METADATA)[None, :num_columns], + preamble_frames, + 0, + ), + metadata[obj_idx], + ] ) else: metadata[obj_idx] = trim( @@ -562,4 +579,32 @@ def add_remove_preamble( samples=True, ) + # postamble + postamble_frames = postamble / IVAS_FRAME_LEN_MS + if not postamble_frames.is_integer(): + raise ValueError( + f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " + f"Frame length: {IVAS_FRAME_LEN_MS}ms" + ) + for obj_idx in range(len(metadata)): + if metadata is not None and metadata[obj_idx] is not None: + if add: + num_columns = metadata[obj_idx].shape[1] + metadata[obj_idx] = np.vstack( + [ + metadata[obj_idx], + np.repeat( + np.array(DEFAULT_ISM_METADATA)[None, :num_columns], + postamble_frames, + 0, + ), + ] + ) + else: + metadata[obj_idx] = trim( + metadata[obj_idx], + limits=(0, int(postamble_frames)), + samples=True, + ) + return metadata diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index b894dd96..83dc0097 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) - # add preamble - # also apply preamble to ISM metadata + # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos - # modify metadata - metadata = add_remove_preamble(metadata, preamble) + # add preamble + metadata = add_remove_preamble(metadata, preamble, 0) + + # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] + + # add postable + if self.postamble: + metadata = add_remove_preamble(metadata, 0, self.postamble) + meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files - audio_object.obect_pos = metadata + audio_object.object_pos = metadata + # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) - # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals + # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 29797295..71674283 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -407,28 +407,36 @@ def remove_pre_and_postamble( ): # remove preamble for ISM metadata if out_fmt.startswith("ISM"): + # remove postamble + if postamble_len_ms: + meta = add_remove_preamble(meta, 0, postamble_len_ms, add=False) + # cut first half of the metadata if repeat_signal: meta = [m[int(len(m) / 2) :, :] for m in meta] # remove preamble if preamble_len_ms > 0: - meta = add_remove_preamble(meta, preamble_len_ms, add=False) + meta = add_remove_preamble(meta, preamble_len_ms, 0, add=False) + + # remove postamble + if postamble_len_ms: + if logger: + logger.debug("Remove postamble") + postamble_len_samples = (postamble_len_ms * fs) // 1000 + x = trim(x, fs, (0, postamble_len_samples), samples=True) - # get number of samples to cut from start - trim_len_samples = (preamble_len_ms * fs) // 1000 - postamble_len_samples = (postamble_len_ms * fs) // 1000 + # cut first half of signal if repeat_signal: if logger: logger.debug("Remove first half of signal") + x = x[len(x) // 2 :, :] - # need to subtract the postamble length before getting half of signal length - it was added after concatenation - trim_len_samples += (len(x) - postamble_len_samples) // 2 - - if trim_len_samples > 0 and logger: - logger.debug("Remove preamble") - - x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True) + # remove preamble + if preamble_len_ms: + if logger: + logger.debug("Remove preamble") + x = trim(x, fs, ((preamble_len_ms * fs) // 1000, 0), samples=True) return x, meta @@ -464,4 +472,3 @@ def preprocess_background_noise(cfg): ] = output_audio return - diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index 86b893b2..69a77e61 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -240,17 +240,6 @@ class Processing_splitting_scaling(Processing): out_meta = repeat(None) else: - # check length of output signals - # input_aligned_file = ( - # in_file.parent.parent - # / f"{Path(in_file.stem).stem}.wav" - # ) - # input_aligned_array, _ = read(input_aligned_file) - # if (len_inp := len(input_aligned_array)) != (len_out := len(x)): - # warn( - # f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}." - # ) - # set output values out_files = [out_file] file_splits = [x] -- GitLab From 5ac7e5331fae78e86fb56046a7b47b31426965aa Mon Sep 17 00:00:00 2001 From: Treffehn Date: Tue, 13 Jun 2023 10:48:43 +0200 Subject: [PATCH 4/6] changed codec tag to RC1b --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d5a2b1b5..881b070e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,7 +39,7 @@ stages: # make sure that we are at latest main # TODO: temporarily use the RC1a tag - git restore . - - git checkout 20230511-RC1a-listening-tests + - git checkout 20230511-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" - echo "--------------------------------------------" -- GitLab From caae8ffc9a2089f58d1251d34245614b162aa3d9 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Tue, 13 Jun 2023 10:51:18 +0200 Subject: [PATCH 5/6] changed tag name --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 881b070e..49d13ed9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,7 +39,7 @@ stages: # make sure that we are at latest main # TODO: temporarily use the RC1a tag - git restore . - - git checkout 20230511-RC1b-listening-tests + - git checkout 20230516-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" - echo "--------------------------------------------" -- GitLab From 7d1faf1d908f7b692fad3950734b78c9cea137c8 Mon Sep 17 00:00:00 2001 From: knj Date: Tue, 13 Jun 2023 11:01:22 +0200 Subject: [PATCH 6/6] do fetch before checkout --- .gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 49d13ed9..9fc552bf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -37,8 +37,9 @@ stages: # NOTE: CODEC_DIR has to be in PATH - cd $CODEC_DIR # make sure that we are at latest main - # TODO: temporarily use the RC1a tag + # TODO: temporarily use the RC1b tag - git restore . + - git fetch - git checkout 20230516-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" -- GitLab