我是Unity 3d中的新手,并开发了一个应用程序。在那里有“自动”唇同步的功能。
我正在按照以下教程
http://answers.unity3d.com/questions/139323/any-way-of-quotautomaticquot-lip-syncing.html
查看下面的代码
using UnityEngine;
using System.Collections;
public class lipmovement2: MonoBehaviour
{
// Use this for initialization
/*Class for implementing Lips Syncronisation*/
public AudioClip source_clip;
public float[] freqData;
int nSamples = 256;
int fMax = 24000;
public Transform upmouth0_M, upmouth01_L, upmouth02_R, downmouth1_M, downmouth11_L, downmouth12_R;
float volume = 1000;
// float freqLow = 200;
// float freqHigh = 800;
//value change
float freqLow = 200;
float freqHigh = 1600;
int sizeFilter = 5;
float[] filter;
float filterSum;
int posFilter = 0;
int qSample = 0;
int video_Length, secCounter;
float y0, y1;
void OnEnable ()
{
secCounter = 0;
// y0 = mouth0.localPosition.y;
// y1 = mouth1.localPosition.y;
y0 = upmouth0_M.localPosition.y;
y0 = upmouth01_L.localPosition.y;
y0 = upmouth02_R.localPosition.y;
y1 = downmouth1_M.localPosition.y;
y1 = downmouth11_L.localPosition.y;
y1 = downmouth12_R.localPosition.y;
freqData = new float[nSamples];
//source_clip = SetFace.voiceOver;
GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceFeed.clip;
GetComponent<AudioSource> ().Play ();
video_Length = Mathf.CeilToInt (source_clip.length);
}
float BandVol (float fLow, float fHigh)
{
fLow = Mathf.Clamp (fLow, 20, fMax);
fHigh = Mathf.Clamp (fHigh, fLow, fMax);
GetComponent<AudioSource> ().GetSpectrumData (freqData, 0, FFTWindow.BlackmanHarris);
int n1 = Mathf.FloorToInt (fLow * nSamples / fMax);
int n2 = Mathf.FloorToInt (fHigh * nSamples / fMax);
float sum = 0;
for (int i = n1; i <= n2; i++) {
sum = freqData [i];
}
return sum;
}
float MovingAverage (float sample)
{
if (qSample == 0)
filter = new float[sizeFilter];
filterSum += sample - filter [posFilter];
filter [posFilter++] = sample;
if (posFilter > qSample) {
qSample = posFilter;
}
posFilter = posFilter % sizeFilter;
return filterSum / qSample;
}
void Start ()
{
/*secCounter = 0;
y0 = mouth0.localPosition.y;
y1 = mouth1.localPosition.y;
freqData = new float[nSamples];
//source_clip = SetFace.voiceOver;
GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceOver;
GetComponent<AudioSource> ().Play ();
video_Length = Mathf.CeilToInt (source_clip.length);
*/
//Debug.Log (y0);
// DebugConsole.Log (y0.ToString ());
// Debug.Log (Application.persistentDataPath);
/*StartCoroutine (Timer ());
StartCoroutine (recordScreen ());
*/
}
/* IEnumerator Timer ()
{
while (secCounter < video_Length) {
yield return new WaitForSeconds (1f);
secCounter += 1;
}
}*/
float limValue;
// Update is called once per frame
void Update ()
{
float band_vol = BandVol (freqLow, freqHigh);
float val = MovingAverage (band_vol) * volume;
//limValue = val;//Mathf.Clamp (val, 0, 0.1f);
//limValue = Mathf.Clamp (val, 0, 10f);
//check new lip movement abd set clamp val
limValue = Mathf.Clamp (val, 0, 25f);
//Debug.Log (y0 - limValue);
if (Input.GetKeyDown (KeyCode.Escape)) {
Application.Quit ();
}
/* mouth0.position = new Vector3 (mouth0.position.x, y0 - MovingAverage (band_vol) * volume, mouth0.position.z);
mouth1.position = new Vector3 (mouth1.position.x, y1 + MovingAverage (band_vol) * volume * 0.3f, mouth1.position.z);*/
}
void LateUpdate ()
{
// mouth0.localPosition = new Vector3 (mouth0.localPosition.x, y0 - limValue, mouth0.localPosition.z);
// mouth1.localPosition = new Vector3 (mouth1.localPosition.x, y1 + limValue, mouth1.localPosition.z);
upmouth0_M.localPosition = new Vector3 (upmouth0_M.localPosition.x, y0 - limValue, upmouth0_M.localPosition.z);
upmouth01_L.localPosition = new Vector3 (upmouth01_L.localPosition.x, y0 - limValue, upmouth01_L.localPosition.z);
upmouth02_R.localPosition = new Vector3 (upmouth02_R.localPosition.x, y0 - limValue, upmouth02_R.localPosition.z);
downmouth1_M.localPosition = new Vector3 (downmouth1_M.localPosition.x, y1 + limValue, downmouth1_M.localPosition.z);
downmouth11_L.localPosition = new Vector3 (downmouth11_L.localPosition.x, y1 + limValue, downmouth11_L.localPosition.z);
downmouth12_R.localPosition = new Vector3 (downmouth12_R.localPosition.x, y1 + limValue, downmouth12_R.localPosition.z);
}
}
我在这里面临一些问题,如下面的
1)如何识别人类的声音? :因为如果有其他声音,如音乐等会被检测到,那我们怎么能停下来呢?我希望嘴唇只能为人声同步。
2)当我记录距离是否接近设备然后它完美地工作但如果距离稍微远一点,那么嘴唇就不会同步。
那么建议我哪里出错了?以及如何解决上述问题?
答案 0 :(得分:1)
2)麦克风录制的声级随距离减小。因此,每个频带上的能量将更少(即,GetSpectrumData给出的值更小)。如果增加'volume'参数的值,则
中的val变大 {
"id": "{driveItem-id}",
"name": "{file-name}",
"file": {
"hashes": {
"quickXorHash": "YY1FIiSDCS9hcAptSPs7prNdf5A="
},
"mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
},
"size": 22750,
"webUrl": "https://{domain}/{path}/_layouts/WopiFrame.aspx?sourcedoc{id}&file={file-name}&action=default",
}
......嘴唇会沿着y轴移动更多。
1) 如果在较低频带(比如0-1000Hz)中存在足够的噪声而不是整个频谱(比如0-16000Hz),则简单的算法将仅查看频率数据并将输入分类为语音。这可能会阻止算法在随机噪声中进行唇形同步。 对于更高级的需求,我会实现MFCC算法。然后我会用常见的音素训练算法,如果根据录制的音频流计算的MFCC足够接近训练数据,则进行唇形同步。