diff --git a/ivas_processing_scripts/generation/generate_ismN_items.py b/ivas_processing_scripts/generation/generate_ismN_items.py index 3c474309e24763e35b6c13c906448cb68bc565b0..948c6d48c4ee7f7ce0649c09d10507d6d8edb362 100644 --- a/ivas_processing_scripts/generation/generate_ismN_items.py +++ b/ivas_processing_scripts/generation/generate_ismN_items.py @@ -446,7 +446,9 @@ def generate_ismN_scene( y.object_pos.extend(x.object_pos) # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...) - y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv"))) + y.metadata_files.insert( + i, str(output_filename.with_suffix(output_filename.suffix + f".{i}.csv")) + ) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: diff --git a/ivas_processing_scripts/generation/generate_masa_items.py b/ivas_processing_scripts/generation/generate_masa_items.py index 6ddac8707f566cef93ee5542662ea7b9e740420b..faba91122c383089673146dff09d07a7bef2e8ff 100644 --- a/ivas_processing_scripts/generation/generate_masa_items.py +++ b/ivas_processing_scripts/generation/generate_masa_items.py @@ -261,7 +261,7 @@ def generate_MASA_scene( sys.exit(-1) # calculate absolute shift of the source signal in seconds - source_shift = end_position[overlap_ref] - overlap + source_shift = end_position[overlap_ref] + overlap else: source_shift = 0.0 @@ -525,7 +525,7 @@ def generate_MASA_scene( y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000) # generate MASA metadata filename (should end with .met) - y.metadata_file = output_filename.with_suffix(".met") + y.metadata_file = output_filename.with_suffix(output_filename.suffix + ".met") # convert the intermediate SBA output signal to MASA format render_sba_to_masa(y_int, y) diff --git a/ivas_processing_scripts/generation/generate_omasa_items.py b/ivas_processing_scripts/generation/generate_omasa_items.py index 0881c7ca200e5cf639d23bfd9a1f0edd1b205b9e..b8aba2b6105a9675a4bd778ba6b872c7fc93b571 100644 --- a/ivas_processing_scripts/generation/generate_omasa_items.py +++ b/ivas_processing_scripts/generation/generate_omasa_items.py @@ -474,7 +474,12 @@ def generate_OMASA_scene( # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...) y_int.metadata_files.insert( - i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")) + i - 1, + str( + output_filename.with_suffix( + output_filename.suffix + f".{i - 1}.csv" + ) + ), ) # append pre-amble and post-amble @@ -520,7 +525,9 @@ def generate_OMASA_scene( y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000) # generate and insert MASA metadata filename (should end with .met) - y.metadata_files.append(str(output_filename.with_suffix(".met"))) + y.metadata_files.append( + str(output_filename.with_suffix(output_filename.suffix + ".met")) + ) # convert the intermediate OSBA object to OMASA object convert_osba(y_int, y) diff --git a/ivas_processing_scripts/generation/generate_osba_items.py b/ivas_processing_scripts/generation/generate_osba_items.py index 8d2ca0d85209e6e7e86f24b19daeb8fb6bb56465..8190f5ca32425c96f8af35aae5c070d46f5f996b 100644 --- a/ivas_processing_scripts/generation/generate_osba_items.py +++ b/ivas_processing_scripts/generation/generate_osba_items.py @@ -460,7 +460,12 @@ def generate_OSBA_scene( # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...) y.metadata_files.insert( - i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")) + i - 1, + str( + output_filename.with_suffix( + output_filename.suffix + f".{i - 1}.csv" + ) + ), ) # append pre-amble and post-amble diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index 631d6165481f1cc09ccfe56335b3af86ec16e4b4..22aba8ab77bc2ca71a4249d6bc1e77b30e9820fd 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -39,6 +39,7 @@ from pathlib import Path import numpy as np from ivas_processing_scripts.audiotools import audio, audioarray, audiofile +from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.reverb import ( @@ -208,13 +209,29 @@ def generate_sba_scene( source_file = ( scene["input"][i] if isinstance(scene["input"], list) else scene["input"] ) - IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"] - # get input filename and IR filename input_filename = Path(source_file).parent / ( cfg.use_input_prefix + Path(source_file).name ) - IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name) + + # get input filename and IR filename + if "IR" in scene.keys(): + IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"] + IR_filename = Path(IR_file).parent / ( + cfg.use_IR_prefix + Path(IR_file).name + ) + else: + # read azimuth and elevation information + source_azi = ( + scene["azimuth"][i] + if isinstance(scene["azimuth"], list) + else scene["azimuth"] + ) + source_ele = ( + scene["elevation"][i] + if isinstance(scene["elevation"], list) + else scene["elevation"] + ) # read the source shift length (in seconds) if "shift" in scene.keys(): @@ -282,9 +299,14 @@ def generate_sba_scene( else: level = -26 - logger.info( - f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds" - ) + if "IR" in scene.keys(): + logger.info( + f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds" + ) + else: + logger.info( + f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds" + ) # read source file x = audio.fromfile("MONO", input_filename) @@ -301,17 +323,6 @@ def generate_sba_scene( x.audio = resampled_audio x.fs = cfg.fs - # read the IR file (!must be in target format!) - IR = audio.fromfile(cfg.format, IR_filename) - - # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object - if cfg.format == "FOA": - x = reverb_foa(x, IR, mode=None) - elif cfg.format == "HOA2": - x = reverb_hoa2(x, IR, mode=None) - elif cfg.format == "HOA3": - x = reverb_hoa3(x, IR, mode=None) - # adjust the level of the FOA/HOA2/HOA3 signal if level is None: # do not change the level of the audio source signal @@ -331,6 +342,100 @@ def generate_sba_scene( x.audio, x.fs, limits=[0, -N_pad], samples=True ) + # get the number of frames (multiple of 20ms) + N_frames = int(len(x.audio) / frame_len) + + if "IR" in scene.keys(): + # read the IR file (!must be in target format!) + IR = audio.fromfile(cfg.format, IR_filename) + + # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object + if cfg.format == "FOA": + x = reverb_foa(x, IR, mode=None) + elif cfg.format == "HOA2": + x = reverb_hoa2(x, IR, mode=None) + elif cfg.format == "HOA3": + x = reverb_hoa3(x, IR, mode=None) + else: + # convert MONO to ISM1 + x_ism = audio.ObjectBasedAudio("ISM1") # ISM with 1 channel + x_ism.fs = cfg.fs + x_ism.audio = x.audio.copy() + + # convert azimuth information in case of moving object + if isinstance(source_azi, str): + if ":" in source_azi: + # convert into array (initial_value:step:stop_value) + start_str, step_str, stop_str = source_azi.split(":") + start = float(eval(start_str)) + step = float(eval(step_str)) + stop = float(eval(stop_str)) + azi = np.arange(start, stop, step) + + # adjust length to N_frames + if len(azi) > N_frames: + azi = azi[:N_frames] + elif len(azi) < N_frames: + azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + else: + # replicate static azimuth value N_frames times + azi = np.repeat(float(eval(source_azi)), N_frames) + else: + # replicate static azimuth value N_frames times + azi = np.repeat(float(source_azi), N_frames) + + # convert azimuth from 0 .. 360 to -180 .. +180 + azi = (azi + 180) % 360 - 180 + + # check if azimuth is from -180 .. +180 + if any(azi > 180) or any(azi < -180): + logger.error( + f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" + ) + + # convert elevation information in case mof moving object + if isinstance(source_ele, str): + if ":" in source_ele: + # convert into array (initial_value:step:stop_value) + start_str, step_str, stop_str = source_ele.split(":") + start = float(eval(start_str)) + step = float(eval(step_str)) + stop = float(eval(stop_str)) + ele = np.arange(start, stop, step) + + # adjust length to N_frames + if len(ele) > N_frames: + ele = ele[:N_frames] + elif len(ele) < N_frames: + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + + else: + # replicate static elevation value N_frames times + ele = np.repeat(float(eval(source_ele)), N_frames) + else: + # replicate static elevation value N_frames times + ele = np.repeat(float(source_ele), N_frames) + + # wrap elevation angle to -90 .. +90 + ele = ((ele + 90) % 180) - 90 + + # check if elevation is from -90 .. +90 + if any(ele > 90) or any(ele < -90): + logger.error( + f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" + ) + + # generate radius vector with all values equal to 1.0 + rad = np.ones(N_frames) + + # arrange all metadata fields column-wise into a matrix + x_ism.object_pos.append(np.column_stack((azi, ele, rad))) + + # convert ISM1 object to SBA + x_sba = audio.SceneBasedAudio(cfg.format) + convert_objectbased(x_ism, x_sba) + x = x_sba # replace x with the SBA object + # add the convolved FOA/HOA2/HOA3 audio source signal to the output signal if y.audio is None: # add source signal to the array of all source signals @@ -338,7 +443,7 @@ def generate_sba_scene( if source_shift > 0: # insert zeros to the new audio source signal to shift it right - y.audio = audioarray.trim_meta( + y.audio = audioarray.trim( y.audio, y.fs, limits=[-source_shift, 0], samples=True ) else: