inference.py: adding bias removal from inference pipeline

rafaelvalle · rafaelvalle · commit 60674be9b02b · 2019-03-15T16:46:05.000-07:00
diff --git a/inference.py b/inference.py
@@ -12,10 +12,10 @@
 #        names of its contributors may be used to endorse or promote products
 #        derived from this software without specific prior written permission.
 #
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 #  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 #  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 #  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
@@ -28,9 +28,11 @@
 from scipy.io.wavfile import write
 import torch
 from mel2samp import files_to_list, MAX_WAV_VALUE
+from denoiser import Denoiser
 
 
-def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16):
+def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
+         denoiser_strength):
     mel_files = files_to_list(mel_files)
     waveglow = torch.load(waveglow_path)['model']
     waveglow = waveglow.remove_weightnorm(waveglow)
@@ -40,21 +42,29 @@ def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16):
         for k in waveglow.convinv:
             k.float()
 
+    if denoiser_strength > 0:
+        denoiser = Denoiser(waveglow).cuda()
+
     for i, file_path in enumerate(mel_files):
         file_name = os.path.splitext(os.path.basename(file_path))[0]
         mel = torch.load(file_path)
         mel = torch.autograd.Variable(mel.cuda())
         mel = torch.unsqueeze(mel, 0)
         mel = mel.half() if is_fp16 else mel
         with torch.no_grad():
-            audio = MAX_WAV_VALUE*waveglow.infer(mel, sigma=sigma)[0]
+            audio = waveglow.infer(mel, sigma=sigma)
+            if denoiser_strength > 0:
+                audio = denoiser(audio, denoiser_strength)
+            audio = audio * MAX_WAV_VALUE
+        audio = audio.squeeze()
         audio = audio.cpu().numpy()
         audio = audio.astype('int16')
         audio_path = os.path.join(
             output_dir, "{}_synthesis.wav".format(file_name))
         write(audio_path, sampling_rate, audio)
         print(audio_path)
 
+
 if __name__ == "__main__":
     import argparse
 
@@ -66,8 +76,10 @@ def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16):
     parser.add_argument("-s", "--sigma", default=1.0, type=float)
     parser.add_argument("--sampling_rate", default=22050, type=int)
     parser.add_argument("--is_fp16", action="store_true")
+    parser.add_argument("-d", "--denoiser_strength", default=0.0, type=float,
+                        help='Removes model bias. Start with 0.1 and adjust')
 
     args = parser.parse_args()
 
-    main(args.filelist_path, args.waveglow_path, args.sigma,
-         args.output_dir, args.sampling_rate, args.is_fp16)
+    main(args.filelist_path, args.waveglow_path, args.sigma, args.output_dir,
+         args.sampling_rate, args.is_fp16, args.denoiser_strength)