diff --git a/README.md b/README.md index 2a488dde801b1074359e12d03ebaa3960f94905c..d718979fbcd534c5e96ba91465579c75056ff1e3 100755 --- a/README.md +++ b/README.md @@ -513,7 +513,7 @@ The following additional executables are needed for the different processing ste | Processing step | Executable | Where to find | |-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------| -| Loudness measurement and adjustment | bs1770demo | https://github.com/openitu/STL | +| Loudness measurement and adjustment | bs1770demo | https://github.com/ErikNorvell-Ericsson/STL (Note branch) | | MNRU | p50fbmnru | https://github.com/openitu/STL | | ESDRU | esdru | https://github.com/openitu/STL | | Frame error pattern application | eid-xor | https://github.com/openitu/STL | diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 16ac8b935e976527f4cafa4098bb7e54f3877d38..4a6fa6e8736f8c54246530cd9cfc45b5e3f89ffb 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -70,7 +70,7 @@ def logging_init(args, cfg): cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w" ) file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT)) - file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO) + file_handler.setLevel(logging.DEBUG) logger.addHandler(file_handler) logger.info(f"Processing test configuration file {args.config}") diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 8ca2d2f7669cbe332d44132ef25c426f6d9c7ac5..2186f8411ebbe0e0f0d7234fc5017b24eb3f7ca7 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -253,7 +253,7 @@ def process_audio( logger.debug( f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo" ) - x.audio = loudness_norm(x, loudness, loudness_fmt) + x.audio = loudness_norm(x, loudness, loudness_fmt, logger=logger) """low-pass filtering""" if fc is not None: diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 20bc86b4bbacd3796a19aa407f266bf262a407a8..a137eeae562bde9ee5a375635678dd9159644318 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -47,9 +47,6 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES from ivas_processing_scripts.utils import find_binary, get_devnull, run -logger = logging.getLogger("__main__") -logger.setLevel(logging.DEBUG) - def bs1770demo( input: audio.Audio, @@ -65,6 +62,8 @@ def bs1770demo( Input audio target_loudness: Optional[float] Desired loudness in LKFS + rms: Optional[bool] + Flag for using rms argument in bs1770demo tool (for low level signals) Returns ------- @@ -85,14 +84,12 @@ def bs1770demo( binary = find_binary("bs1770demo") # checking if the new binary (with '-rms') is used - with TemporaryDirectory() as tmp_dir_test: - tmp_dir_test = Path(tmp_dir_test) - result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE) - result_stdout = result.stdout.decode("utf-8") - if "-rms" not in result_stdout: - raise RuntimeError( - 'An bs1770demo executable without RMS support (cmdl option "-rms") was detected. Please update the bs1770demo executable. See bin/README.md for details.' - ) + result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE) + result_stdout = result.stdout.decode("utf-8") + if "-rms" not in result_stdout: + raise RuntimeError( + 'An bs1770demo executable without RMS support (cmdl option "-rms") was detected. Please update the bs1770demo executable. See bin/README.md for details.' + ) if not isinstance(input, audio.BinauralAudio) and not isinstance( input, audio.ChannelBasedAudio @@ -146,8 +143,9 @@ def bs1770demo( # using rms if true if rms: cmd.insert(1, "-rms") + # run command - result = run(cmd, logger=logger) + result = run(cmd) # parse output # we are looking for the (floating-point) number after the search string - '( )' around the number denotes the first group @@ -174,10 +172,12 @@ def get_loudness( ---------- input : Audio Input audio - target_loudness: float + target_loudness: Optional[float] Desired loudness in LKFS - loudness_format: str + loudness_format: Optional[str] Loudness format to render to for loudness computation (default input format if possible) + rms: Optional[bool] + Flag for using rms argument in bs1770demo tool (for low level signals) Returns ------- @@ -212,10 +212,7 @@ def get_loudness( convert.format_conversion(input, tmp) else: tmp.audio = input.audio - if rms: - return bs1770demo(tmp, target_loudness, rms=True) - else: - return bs1770demo(tmp, target_loudness) + return bs1770demo(tmp, target_loudness, rms) def loudness_norm( @@ -223,6 +220,8 @@ def loudness_norm( target_loudness: Optional[float] = -26, loudness_format: Optional[str] = None, rms: Optional[bool] = False, + logger: Optional[logging.Logger] = None, + file_name_logging: Optional[Union[str, Path]] = None, ) -> np.ndarray: """ Iterative loudness normalization using ITU-R BS.1770-4 @@ -237,6 +236,12 @@ def loudness_norm( Desired loudness level in LKFS loudness_format: Optional[str] Loudness format to render to for loudness computation (default input format) + rms: Optional[bool] + Flag for using rms argument in bs1770demo tool (for low level signals) + logger: Optional[logging.Logger] + Logger to log loudness information + file_name_logging: Optional[Union[str, Path]] + Name of processed file for logging information Returns ------- @@ -244,42 +249,56 @@ def loudness_norm( Normalized audio """ - # repeat until convergence of loudness measured_loudness = np.inf - scale_factor = 1 num_iter = 1 + scaled_input = copy.deepcopy(input) + + # save loudness before and after scaling for the logger info + loudness_before, scale_factor_new = get_loudness( + scaled_input, target_loudness, loudness_format, rms + ) + # repeat until convergence of loudness while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10: + # scale input + scaled_input.audio *= scale_factor_new + + # measure loudness and get scaling factor measured_loudness, scale_factor_new = get_loudness( - input, - target_loudness, - loudness_format, - rms=rms, + scaled_input, target_loudness, loudness_format, rms ) - # scale input - input.audio *= scale_factor_new + num_iter += 1 - # update scale factor - scale_factor *= scale_factor_new + loudness_after = measured_loudness - num_iter += 1 + # log loudness before and after adjustment + if logger: + if file_name_logging: + logger.debug( + f"File {file_name_logging} loudness; before: {loudness_before}, after: {loudness_after}" + ) + else: + logger.debug( + f"Loudness; before: {loudness_before}, after: {loudness_after}" + ) if num_iter >= 10: warn( - f"Loudness did not converge to desired value, stopping at: {measured_loudness:.2f}" + f"Loudness did not converge to desired value, stopping at: {loudness_after:.2f}" ) - return input.audio + return scaled_input.audio def scale_files( file_list: list[list[Union[Path, str]]], fmt: str, loudness: float, + loudness_format: Optional[str] = None, fs: Optional[int] = 48000, in_meta: Optional[list] = None, - rms: Optional[bool] = False, + logger: Optional[logging.Logger] = None, ) -> None: """ Scales audio files to desired loudness @@ -292,15 +311,22 @@ def scale_files( Audio format of files in list loudness: float Desired loudness level in LKFS/dBov + loudness_format: Optional[str] + Format for loudness measurement fs: Optional[int] Sampling rate in_meta: Optional[list] Metadata for ISM with same structure as file_list but one layer more for the list of metadata for one file + logger: Optional[logging.Logger] + Logger to log loudness information """ - if fmt.startswith("ISM") and in_meta: - meta_bool = True + if fmt.startswith("ISM"): + if in_meta: + meta_bool = True + else: + raise ValueError("No metadata available for loudness measurement") else: in_meta = copy.copy(file_list) meta_bool = False @@ -314,10 +340,13 @@ def scale_files( audio_obj = audio.fromfile(fmt, file, fs) # adjust loudness - if rms: - scaled_audio = loudness_norm(audio_obj, loudness, rms=True) - else: - scaled_audio = loudness_norm(audio_obj, loudness) + scaled_audio = loudness_norm( + audio_obj, + loudness, + loudness_format, + logger=logger, + file_name_logging=file, + ) # write into file write(file, scaled_audio, audio_obj.fs) diff --git a/ivas_processing_scripts/bin/README.md b/ivas_processing_scripts/bin/README.md index 99b1f15feede2b770aea27866cc4eee765f6cf06..89b09513e3644b9274a9d27a0aabac414410bf1e 100755 --- a/ivas_processing_scripts/bin/README.md +++ b/ivas_processing_scripts/bin/README.md @@ -3,7 +3,7 @@ Necessary additional executables: | Processing step | Executable | Where to find | |-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------| -| Loudness measurement and adjustment | bs1770demo | https://github.com/openitu/STL | +| Loudness measurement and adjustment | bs1770demo | https://github.com/ErikNorvell-Ericsson/STL (Note branch) | | MNRU | p50fbmnru | https://github.com/openitu/STL | | ESDRU | esdru | https://github.com/openitu/STL | | Frame error pattern application | eid-xor | https://github.com/openitu/STL | @@ -11,5 +11,5 @@ Necessary additional executables: | Filtering, Resampling | filter | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | | Random offset/seed generation (necessary for background noise and FER bitstream processing) | random | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | | JBM network simulator | networkSimulator_g192 | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | -| MASA rendering (also used in loudness measurement of MASA items) | masaRenderer | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip | +| MASA rendering (also used in loudness measurement of MASA items) | masaRenderer | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip | | EVS reference conditions | EVS_cod, EVS_dec | https://www.3gpp.org/ftp/Specs/archive/26_series/26.443/26443-h00.zip | diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 982760876d30806e33ecdc0a5218524a44cf133b..60b7aa72bc157cc8e412b47ba86f2540759131d5 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -192,6 +192,7 @@ def get_processing_chain( # get pre and post processing configurations pre_cfg = getattr(cfg, "preprocessing", {}) + pre2_cfg = getattr(cfg, "preprocessing_2", {}) post_cfg = cfg.postprocessing # default to input values if preprocessing was not requested @@ -383,6 +384,14 @@ def get_processing_chain( raise SystemExit(f"Unknown condition {condition}!") # add postprocessing step based on condition + # if concatenation and splitting do loudness adjustment only on splitted files + if pre2_cfg.get("concatenate_input", False): + loudness_postprocessing = None + loudness_fmt_postprocessing = None + else: + loudness_postprocessing = post_cfg.get("loudness") + loudness_fmt_postprocessing = post_cfg.get("loudness_fmt") + chain["processes"].append( Postprocessing( { @@ -391,8 +400,8 @@ def get_processing_chain( "out_fs": post_cfg.get("fs"), "out_fmt": post_cfg.get("fmt"), "out_cutoff": tmp_lp_cutoff, - "out_loudness": post_cfg.get("loudness"), - "out_loudness_fmt": post_cfg.get("loudness_fmt"), + "out_loudness": loudness_postprocessing, + "out_loudness_fmt": loudness_fmt_postprocessing, "bin_dataset": post_cfg.get("bin_dataset"), "bin_lfe_gain": post_cfg.get("bin_lfe_gain"), "limit": post_cfg.get("limit", True), diff --git a/ivas_processing_scripts/processing/config.py b/ivas_processing_scripts/processing/config.py index 7027ce75a911ffba01676dc2d5ba8c18de95ba89..a90487e6121545eb03322d2af7078c9208e2576d 100755 --- a/ivas_processing_scripts/processing/config.py +++ b/ivas_processing_scripts/processing/config.py @@ -141,8 +141,13 @@ class TestConfig: # validate preprocessing on concatenated file stage if (pre_proc_2 := getattr(cfg, "preprocessing_2", None)) is not None: bg_noise_folder = Path(pre_proc_2["background_noise_path"]).parent - if bg_noise_folder.resolve().absolute() == cfg.input_path.resolve().absolute(): - raise ValueError(f"Background noise file has to be placed outside the input folder!") + if ( + bg_noise_folder.resolve().absolute() + == cfg.input_path.resolve().absolute() + ): + raise ValueError( + "Background noise file has to be placed outside the input folder!" + ) for cond_name, cond_cfg in cfg.get("conditions_to_generate").items(): type = cond_cfg.get("type") diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 5d6c51a388c7c532630b186cecb9840fee393567..0da4a2fc4e1cef745326db5fa5858d91e864004a 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -66,6 +66,7 @@ class Preprocessing2(Processing): # add preamble if self.preamble: + logger.debug(f"Add preamble of length {self.preamble}ms") # also apply preamble to ISM metadata if self.in_fmt.startswith("ISM"): # read out old @@ -91,14 +92,30 @@ class Preprocessing2(Processing): # add background noise if self.background_noise: - audio_object.audio = self.add_background_noise(audio_object, in_meta) + logger.debug( + f"Add background noise from file {self.background_noise.get('background_noise_path', 'file missing')} and SNR {self.background_noise.get('snr', 'snr missing')}" + ) + audio_object.audio = self.add_background_noise( + audio_object, in_meta, logger + ) # save file write(out_file, audio_object.audio, fs=audio_object.fs) return - def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray: + def add_background_noise( + self, audio_object: audio.Audio, in_meta, logger + ) -> np.ndarray: + # check if SNR and background noise are given + if ( + not self.background_noise["background_noise_path"] + or not Path(self.background_noise["background_noise_path"]).exists() + ): + raise ValueError("Background noise does not exist") + if not self.background_noise["snr"]: + raise ValueError("SNR value needed for using background noise") + # range for random delay range_delay = (1, 2400000) @@ -133,6 +150,7 @@ class Preprocessing2(Processing): out_format = self.out_fmt loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format) + logger.debug(f"Loudness of audio signal: {loudness_signal}LKFS") # compute desired loudness of background noise loudness_noise = loudness_signal - self.background_noise["snr"] @@ -149,8 +167,15 @@ class Preprocessing2(Processing): )[: len(audio_object.audio)] # scale background noise to desired loudness based on output format + logger.debug( + f"Scaling of background noise to {self.background_noise['snr']}dB SNR" + ) noise_object.audio = loudness_norm( - noise_object, loudness_noise, out_format, rms=True + noise_object, + loudness_noise, + out_format, + rms=True, + logger=logger, ) # add array to signal diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 6c4fabc0331d60d7dbdae021ec24778350ae0365..d7ed03934bd1b8c5e602c4f416783a2eea453591 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -324,6 +324,7 @@ def preprocess_2(cfg, logger): def reverse_process_2(cfg, logger): # remove preamble if cfg.pre2.preamble: + logger.info("Remove preamble") remove_preamble(cfg) # reverse concatenation @@ -354,8 +355,10 @@ def reverse_process_2(cfg, logger): out_paths_splits, cfg.postprocessing["fmt"], cfg.postprocessing["loudness"], + cfg.postprocessing.get("loudness_fmt", None), cfg.postprocessing["fs"], out_meta_splits, + logger, ) return