本篇博客主要是用tensorflow 来实现激励信号的构建,通过基础频率来构建
主要思路如下
- 在有声音的部分, 激励信号是有 fundamental frequency (f0) 和 harmonics的具体值来构建
- 在静音位置,通过高斯白噪声来构建
具体代码如下
import tensorflow as tf
import numpy as np
class TFSineGen(tf.keras.layers.Layer):
def __init__(self, samp_rate, harmonic_num=0,
sine_amp=0.1, noise_std=0.003,
voiced_threshold=0,
flag_for_pulse=True, **kwargs):
""" Definition of sine generator
SineGen(samp_rate, harmonic_num = 0,
sine_amp = 0.1, noise_std = 0.003,
voiced_threshold = 0,
flag_for_pulse=False)
samp_rate: sampling rate in Hz
harmonic_num: number of harmonic overtones (default 0)
sine_amp: amplitude of sine-wavefrom (default 0.1)
noise_std: std of Gaussian noise (default 0.003)
voiced_thoreshold: F0 threshold for U/V classification (default 0)
flag_for_pulse: this SinGen is used inside PulseGen (default False)
Note: when flag_for_pulse is True, the first time step of a voiced
segment is always sin(np.pi) or cos(0)
"""
super().__init__(**kwargs)
self.sine_amp = sine_amp
self.noise_std = noise_std
self.harmonic_num = harmonic_num
self.dim = self.harmonic_num + 1
self.sampling_rate = samp_rate
self.voiced_threshold = voiced_threshold
self.flag_for_pulse = flag_for_pulse
self.rad_values1 = None
self.rand_ini1 = None
self.cumsum_shift = None
def _f02uv(self, f0):
# generate uv signal
uv = tf.ones_like(f0)
uv = uv * tf.cast((f0 > self.voiced_threshold), dtype=tf.float32)
return uv
def _f02sine(self, f0_values):
""" f0_values: (batchsize, length, dim)
where dim indicates fundamental tone and overtones
"""
# convert to F0 in rad. The interger part n can be ignored
# because 2 * np.pi * n doesn't affect phase
rad_values = (f0_values / self.sampling_rate) % 1
# print(rad_values)
# if self.rad_values1 is None:
# self.rad_values1 = tf.Variable(rad_values, trainable=False, name = 'rad_values1')
"""
rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
device=f0_values.device)
rand_ini[:, 0] = 0
# print("rand_inirand_ini:", rand_ini)
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
"""
self.rad_values1 = rad_values
# print("self.rad_values1:", self.rad_values1)
f0_values_shape = shape_list(f0_values)
# print("self.f0_values_shape:", f0_values_shape)
rand_ini = tf.random.normal(shape=[f0_values_shape[0], f0_values_shape[2]], mean=0.0, stddev=1.0)
# print("f0_values:", f0_values, rand_ini_)
# if self.rand_ini1 is None:
# self.rand_ini1 = tf.Variable(rand_ini_, trainable=False, name='rand_ini1')
# rand_ini_ = rand_ini_
# rand_ini.assign(rand_ini_)
# rand_ini[:, 0] = tf.zeros(f0_values_shape[0])
# rand_ini = tf.unstack(rand_ini)
rand_ini = tf.concat([tf.zeros([f0_values_shape[0], 1]), rand_ini[:, 1:]], axis=-1)
rad_values = tf.concat([tf.expand_dims(rad_values[:, 0, :] + rand_ini, axis=1), rad_values[:, 1:, :]], axis=1)
# To prevent torch.cumsum numerical overflow,
# it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
# Buffer tmp_over_one_idx indicates the time step to add -1.
# This will not change F0 of sine because (x-1) * 2*pi = x *2*pi
tmp_over_one = tf.cumsum(self.rad_values1, 1) % 1
tmp_over_one_idx = (tmp_over_one[:, 1:, :] -
tmp_over_one[:, :-1, :]) < 0
cumsum_shift = tf.zeros_like(self.rad_values1)
cumsum_shift = tf.concat([cumsum_shift[:, 0:1, :], tf.cast(tmp_over_one_idx, dtype=tf.float32) * -1.0], axis=1)
sines = tf.sin(tf.cumsum(rad_values + cumsum_shift, axis=1) * 2 * np.pi)
return sines
def call(self, f0):
""" sine_tensor, uv = forward(f0)
input F0: tensor(batchsize=1, length, dim=1)
f0 for unvoiced steps should be 0
output sine_tensor: tensor(batchsize=1, length, dim)
output uv: tensor(batchsize=1, length, 1)
"""
f0_buf_1 = [f0[:, :, 0]]
# f0_tmp = f0
for idx in np.arange(self.harmonic_num):
f0_tmp = f0[:, :, 0] * (idx + 2)
f0_buf_1.append(f0_tmp)
f0_buf = tf.stack(f0_buf_1, axis=-1)
# generate sine waveforms
sine_waves = self._f02sine(f0_buf) * self.sine_amp
# generate uv signal
uv = self._f02uv(f0)
noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
# print('noise_amp', noise_amp)
noise = noise_amp * tf.random.normal(shape=tf.shape(sine_waves)) # tf.randn_like(sine_waves)
# first: set the unvoiced part to 0 by uv
# then: additive noise
sine_waves = sine_waves * uv + noise
return sine_waves, uv, noise, noise_amp
标签:f0,rand,values,self,构建,tf,Tensorflow,sine
From: https://www.cnblogs.com/wuzhitj/p/16719190.html