diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d5a2b1b593e06413ff2540b3680af78de0e9cbbb..9fc552bf75518c5e66af4ab4128062b1e01a8888 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -37,9 +37,10 @@ stages: # NOTE: CODEC_DIR has to be in PATH - cd $CODEC_DIR # make sure that we are at latest main - # TODO: temporarily use the RC1a tag + # TODO: temporarily use the RC1b tag - git restore . - - git checkout 20230511-RC1a-listening-tests + - git fetch + - git checkout 20230516-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" - echo "--------------------------------------------" diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 0922ae3814a794d34f93dcfc8338471de01652c3..5e62b3fbdcce47ca5225af94d2bf367292decd43 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -42,8 +42,10 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, + DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, + NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, ) @@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio): return obj def init_metadata(self): + # check if number of metadata files matches format if self.audio.shape[1] != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" @@ -305,16 +308,19 @@ class ObjectBasedAudio(Audio): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns - if pos.shape[1] < 5: - raise ValueError("Metadata incomplete. Columns are missing.") - elif pos.shape[1] > 5: - if pos.shape[1] <= 8: - # TODO: FIXME - pos = pos[:, :5] - else: - raise ValueError( - "Too many columns in metadata (possibly old version with frame index used)" - ) + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError( + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) # check if metadata is longer than file -> cut off num_frames = int( diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index f9f020c80ba399d85dcf4ee7963641716a410c23..a77dcce756a856a44838aadd709b47628b83a947 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -703,3 +703,16 @@ DELAY_COMPENSATION_FOR_FILTERING = { "HP50_32KHZ": 559, "HP50_48KHZ": 839, } + +DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] +FORMAT_ISM_METADATA_CSV = [ + "%+07.2f", + "%+06.2f", + "%05.2f", + "%06.2f", + "%04.2f", + "%+07.2f", + "%+06.2f", + "%1.0f", +] +NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA) diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index 436dc6336957b5f086a2d9d7c7a5e0420d4cb77a..af87fe3feda6464166e537dc61f6e9e255d8126d 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read -from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS +from ivas_processing_scripts.audiotools.constants import ( + DEFAULT_ISM_METADATA, + FORMAT_ISM_METADATA_CSV, + IVAS_FRAME_LEN_MS, + NUMBER_COLUMNS_ISM_METADATA, +) class Metadata: @@ -201,16 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] + number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): - row_list = [ - "%+07.2f" % np.round(metadata[i][k, 0], 2), - "%+06.2f" % np.round(metadata[i][k, 1], 2), - "01.00", - "000.00", - "1.00", - ] + row_list = [] + for p in range(number_columns): + row_list.append( + FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) + ) writer.writerow(row_list) return file_names @@ -374,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: - concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) + concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) @@ -544,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, + postamble, add: Optional[bool] = True, ): + # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( @@ -555,18 +561,49 @@ def add_remove_preamble( for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: + num_columns = metadata[obj_idx].shape[1] + metadata[obj_idx] = np.vstack( + [ + np.repeat( + np.array(DEFAULT_ISM_METADATA)[None, :num_columns], + preamble_frames, + 0, + ), + metadata[obj_idx], + ] + ) + else: metadata[obj_idx] = trim( metadata[obj_idx], - limits=(-int(preamble_frames), 0), + limits=(int(preamble_frames), 0), samples=True, ) - # add radius 1 - metadata[obj_idx][: int(preamble_frames), 2] = 1 + # postamble + postamble_frames = postamble / IVAS_FRAME_LEN_MS + if not postamble_frames.is_integer(): + raise ValueError( + f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " + f"Frame length: {IVAS_FRAME_LEN_MS}ms" + ) + for obj_idx in range(len(metadata)): + if metadata is not None and metadata[obj_idx] is not None: + if add: + num_columns = metadata[obj_idx].shape[1] + metadata[obj_idx] = np.vstack( + [ + metadata[obj_idx], + np.repeat( + np.array(DEFAULT_ISM_METADATA)[None, :num_columns], + postamble_frames, + 0, + ), + ] + ) else: metadata[obj_idx] = trim( metadata[obj_idx], - limits=(int(preamble_frames), 0), + limits=(0, int(postamble_frames)), samples=True, ) diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index b894dd963d72b4679518ad0d76bf27538138b9a5..83dc0097865b5f3a423051c250706bd853d06e89 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) - # add preamble - # also apply preamble to ISM metadata + # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos - # modify metadata - metadata = add_remove_preamble(metadata, preamble) + # add preamble + metadata = add_remove_preamble(metadata, preamble, 0) + + # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] + + # add postable + if self.postamble: + metadata = add_remove_preamble(metadata, 0, self.postamble) + meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files - audio_object.obect_pos = metadata + audio_object.object_pos = metadata + # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) - # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals + # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 29797295cc205b6fb5b2ad726211fba53a3d693e..7167428350fcf47fac8a8a41c3e9841c8748b5ed 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -407,28 +407,36 @@ def remove_pre_and_postamble( ): # remove preamble for ISM metadata if out_fmt.startswith("ISM"): + # remove postamble + if postamble_len_ms: + meta = add_remove_preamble(meta, 0, postamble_len_ms, add=False) + # cut first half of the metadata if repeat_signal: meta = [m[int(len(m) / 2) :, :] for m in meta] # remove preamble if preamble_len_ms > 0: - meta = add_remove_preamble(meta, preamble_len_ms, add=False) + meta = add_remove_preamble(meta, preamble_len_ms, 0, add=False) + + # remove postamble + if postamble_len_ms: + if logger: + logger.debug("Remove postamble") + postamble_len_samples = (postamble_len_ms * fs) // 1000 + x = trim(x, fs, (0, postamble_len_samples), samples=True) - # get number of samples to cut from start - trim_len_samples = (preamble_len_ms * fs) // 1000 - postamble_len_samples = (postamble_len_ms * fs) // 1000 + # cut first half of signal if repeat_signal: if logger: logger.debug("Remove first half of signal") + x = x[len(x) // 2 :, :] - # need to subtract the postamble length before getting half of signal length - it was added after concatenation - trim_len_samples += (len(x) - postamble_len_samples) // 2 - - if trim_len_samples > 0 and logger: - logger.debug("Remove preamble") - - x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True) + # remove preamble + if preamble_len_ms: + if logger: + logger.debug("Remove preamble") + x = trim(x, fs, ((preamble_len_ms * fs) // 1000, 0), samples=True) return x, meta @@ -464,4 +472,3 @@ def preprocess_background_noise(cfg): ] = output_audio return - diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index 86b893b26ee88aae7cbe9f88ced87fbbde6eca9a..69a77e61857c6ada4f2df704bc9388d51e9b0911 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -240,17 +240,6 @@ class Processing_splitting_scaling(Processing): out_meta = repeat(None) else: - # check length of output signals - # input_aligned_file = ( - # in_file.parent.parent - # / f"{Path(in_file.stem).stem}.wav" - # ) - # input_aligned_array, _ = read(input_aligned_file) - # if (len_inp := len(input_aligned_array)) != (len_out := len(x)): - # warn( - # f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}." - # ) - # set output values out_files = [out_file] file_splits = [x]