Anjok07 · evezers · Apr 27, 2024 · Apr 27, 2024
diff --git a/lib_v5/spec_utils.py b/lib_v5/spec_utils.py
@@ -271,8 +271,8 @@ def wave_to_spectrogram(wave, hop_length, n_fft, mp, band, is_v51_model=False):
         wave_left = np.asfortranarray(wave[0])
         wave_right = np.asfortranarray(wave[1])
 
-    spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
-    spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
+    spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
+    spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
 
     spec = np.asfortranarray([spec_left, spec_right])
 
@@ -337,7 +337,7 @@ def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None, is_v
                     spec_s *= get_lp_filter_mask(spec_s.shape[1], bp['lpf_start'], bp['lpf_stop'])
                 else:
                     spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
-                wave = librosa.resample(spectrogram_to_wave(spec_s, bp['hl'], mp, d, is_v51_model), bp['sr'], sr, res_type=wav_resolution)
+                wave = librosa.resample(spectrogram_to_wave(spec_s, bp['hl'], mp, d, is_v51_model), orig_sr=bp['sr'], target_sr=sr, res_type=wav_resolution)
             else: # mid
                 if is_v51_model:
                     spec_s *= get_hp_filter_mask(spec_s.shape[1], bp['hpf_start'], bp['hpf_stop'] - 1)
@@ -347,7 +347,7 @@ def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None, is_v
                     spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
 
                 wave2 = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], mp, d, is_v51_model))
-                wave = librosa.resample(wave2, bp['sr'], sr, res_type=wav_resolution)
+                wave = librosa.resample(wave2, orig_sr=bp['sr'], target_sr=sr, res_type=wav_resolution)
 
     return wave
 
@@ -406,8 +406,8 @@ def wave_to_spectrogram_old(wave, hop_length, n_fft):
     wave_left = np.asfortranarray(wave[0])
     wave_right = np.asfortranarray(wave[1])
 
-    spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
-    spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
+    spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
+    spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
 
     spec = np.asfortranarray([spec_left, spec_right])
 
@@ -716,7 +716,7 @@ def change_pitch_semitones(y, sr, semitone_shift):
     factor = 2 ** (semitone_shift / 12)  # Convert semitone shift to factor for resampling
     y_pitch_tuned = []
     for y_channel in y:
-        y_pitch_tuned.append(librosa.resample(y_channel, sr, sr*factor, res_type=wav_resolution_float_resampling))
+        y_pitch_tuned.append(librosa.resample(y_channel, orig_sr=sr, target_sr=sr*factor, res_type=wav_resolution_float_resampling))
     y_pitch_tuned = np.array(y_pitch_tuned)
     new_sr = sr * factor
     return y_pitch_tuned, new_sr

diff --git a/requirements.txt b/requirements.txt
@@ -7,7 +7,7 @@ einops==0.6.0
 future==0.18.3
 julius==0.2.7
 kthread==0.2.3
-librosa==0.9.2
+librosa==0.10.1
 llvmlite
 matchering==2.0.6
 ml_collections==0.1.1
@@ -36,7 +36,6 @@ onnx
 onnxruntime
 onnxruntime-gpu
 onnx2pytorch
-SoundFile==0.11.0; sys_platform != 'darwin'
+soundfile==0.12.1; sys_platform != 'darwin'
 PySoundFile==0.9.0.post1; sys_platform == 'darwin'
-Dora==0.0.3
-numpy==1.23.5
+numpy==1.24.2
diff --git a/separate.py b/separate.py
@@ -1104,7 +1104,7 @@ def loading_mix(self):
                 wav_resolution = bp['res_type']
 
             if d == bands_n: # high-end band
-                X_wave[d], _ = librosa.load(audio_file, bp['sr'], False, dtype=np.float32, res_type=wav_resolution)
+                X_wave[d], _ = librosa.load(audio_file, sr=bp['sr'], mono=False, dtype=np.float32, res_type=wav_resolution)
                 X_spec_s[d] = spec_utils.wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], self.mp, band=d, is_v51_model=self.is_vr_51_model)
 
                 if not np.any(X_wave[d]) and is_mp3:
@@ -1113,7 +1113,7 @@ def loading_mix(self):
                 if X_wave[d].ndim == 1:
                     X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
             else: # lower bands
-                X_wave[d] = librosa.resample(X_wave[d+1], self.mp.param['band'][d+1]['sr'], bp['sr'], res_type=wav_resolution)
+                X_wave[d] = librosa.resample(X_wave[d+1], orig_sr=self.mp.param['band'][d+1]['sr'], target_sr=bp['sr'], res_type=wav_resolution)
                 X_spec_s[d] = spec_utils.wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], self.mp, band=d, is_v51_model=self.is_vr_51_model)
 
             if d == bands_n and self.high_end_process != 'none':
@@ -1444,7 +1444,7 @@ def loading_mix(X, mp):
             X_wave[d] = X
 
         else: # lower bands
-            X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=wav_resolution)
+            X_wave[d] = librosa.resample(X_wave[d+1], orig_sr=mp.param['band'][d+1]['sr'], target_sr=bp['sr'], res_type=wav_resolution)
 
         X_spec_s[d] = spec_utils.wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], mp, band=d, is_v51_model=True)