我尝试使用语音识别制作Android应用程序,但不幸的是谷歌不支持我的语言(MACEDONIAN),我试图比较两种录音。
我使用http://code.google.com/p/musicg/来记录和比较语音,并且我在初始化时堆叠了用于检测语音的设置。有人可以告诉我如何重写此init函数以进行语音检测这对我来说非常重要......或者其他一些想法 怎么做。
这是口哨检测的初始化
// settings for detecting a whistle
minFrequency = 600.0f;
maxFrequency = Double.MAX_VALUE;
minIntensity = 100.0f;
maxIntensity = 100000.0f;
minStandardDeviation = 0.1f;
maxStandardDeviation = 1.0f;
highPass = 500;
lowPass = 10000;
minNumZeroCross = 50;
maxNumZeroCross = 200;
numRobust = 10;
答案 0 :(得分:1)
我的理解是,音乐组件DetectionApi,仅用于分析单个声音块并告诉您它是否包含声音的类型。如包括哨子或拍手api的例子。我是一个鼓掌/它是一个哨子。
使用musicg,您可能做的最好的事情就是识别声音是否是声音......尽管这可能超出了DetectionApi。
既然你说谷歌api不支持马其顿语,也许你可以试试Pocketsphinx中提到的this stackoverflow article。
答案 1 :(得分:0)
首先,您所要做的就是将录制的声音保存到wav中,然后很容易使用API中的指纹类https://code.google.com/p/musicg/source/browse/#git%2Fsrc%2Fcom%2Fmusicg%2Ffingerprint
以下是我在进行比较的方式,一个临时录制的WAV声音和我的数据中的所有wav声音。
public Cursor FP(String recordedClip, Context context) {
Baza baza = new Baza(context);
Cursor allSound = baza.getAllProtocolsForSoundCheck();
List<Protocol> protocols = new ArrayList<Protocol>();
int PID =-1;
Log.d("broj",allSound.getCount()+"");
for (int i = 0; i < allSound.getCount(); i++) {
Protocol protocol = new Protocol();
allSound.moveToNext();
protocol.setSoundPath(allSound.getString(4));
protocol.setId(Integer.parseInt(allSound.getString(1)));
protocols.add(protocol);
Log.d("brojProtocol",allSound.getString(2)+" ");
baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1);
}
Wave record = new Wave(recordedClip);
List<Wave> waves = new ArrayList<Wave>();
if (protocols != null) {
for (int i = 0; i < protocols.size(); i++) {
waves.add(new Wave(protocols.get(i).getSoundPath()));
}
}
for (int i = 0; i < waves.size(); i++) {
Log.d("similarity", record.getFingerprintSimilarity(waves.get(i))
.getSimilarity()+"");
baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(),
record.getFingerprintSimilarity(waves.get(i))
.getSimilarity());
}
Cursor similarCursor = baza.getSimilarProtocols();
similarCursor.moveToFirst();
TransferClass protocolForTransfer = new TransferClass();
protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1)));
protocolForTransfer.setName(similarCursor.getString(2));
Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId());
// return protocolForTransfer;
return similarCursor;
}
答案 2 :(得分:0)
以下是我将临时录制的声音保存为wav格式的方法:
public class RecorderActivity {
private static final int RECORDER_BPP = 16;
private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound";
private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";
private String AUDIO_RECORDER_FILE = "";
private static final int RECORDER_SAMPLERATE = 8000;
private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;
private RealDoubleFFT transformer;
EndPointDetection endpoint;
int blockSize = 256;
private AudioRecord recorder = null;
private int bufferSize = 0;
private RecorderAsynctask recordingThread = null;
private boolean isRecording = false;
float tempFloatBuffer[] = new float[3];
int tempIndex = 0;
int totalReadBytes = 0;
ImageView imageView;
Bitmap bitmap;
Canvas canvas;
Paint paint;
Context con;
RecorderActivity(String file, Context con, ImageView image) {
AUDIO_RECORDER_FILE = file;
this.con = con;
this.imageView = image;
bitmap = Bitmap.createBitmap((int) 256, (int) 100,
Bitmap.Config.ARGB_8888);
canvas = new Canvas(bitmap);
paint = new Paint();
paint.setStrokeWidth(5);
paint.setColor(Color.BLUE);
imageView.setImageBitmap(bitmap);
transformer = new RealDoubleFFT(256);
bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE,
RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING);
}
public String getFilename() {
String filepath = Environment.getExternalStorageDirectory().getPath();
File file = new File(filepath, AUDIO_RECORDER_FOLDER);
if (!file.exists()) {
file.mkdirs();
}
return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV);
}
private String getTempFilename() {
String filepath = Environment.getExternalStorageDirectory().getPath();
File file = new File(filepath, AUDIO_RECORDER_FOLDER);
if (!file.exists()) {
file.mkdirs();
}
File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE);
if (tempFile.exists())
tempFile.delete();
return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
}
public void startRecording() {
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
RECORDER_SAMPLERATE, RECORDER_CHANNELS,
RECORDER_AUDIO_ENCODING, bufferSize);
recorder.startRecording();
isRecording = true;
recordingThread = new RecorderAsynctask();
recordingThread.execute(this);
}
class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void> {
public void shareLockedfuntionProgreesUpdate(double[] fttrezult) {
publishProgress(fttrezult);
}
@Override
protected Void doInBackground(RecorderActivity... params) {
// TODO Auto-generated method stub
byte data[] = new byte[bufferSize];
String filename = getTempFilename();
FileOutputStream os = null;
try {
os = new FileOutputStream(filename);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int read = 0;
AudioTrack tempAudioTrack;
double[] toTransform = new double[blockSize];
if (null != os) {
while (isRecording) {
// sampleRateTextField.setText(recorder.getSampleRate());
int bufferReadResult = recorder.read(data, 0, blockSize);
for (int i = 0; i < blockSize && i < bufferReadResult; i++) {
toTransform[i] = (double) data[i] / 32768.0; // signed
// 16
// bit
}
transformer.ft(toTransform);
publishProgress(toTransform);
if (AudioRecord.ERROR_INVALID_OPERATION != read) {
try {
os.write(data);
tempIndex++;
} catch (IOException e) {
e.printStackTrace();
}
}
}
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
@Override
protected void onProgressUpdate(double[]... toTransform) {
canvas.drawColor(Color.GRAY);
Paint p = new Paint();
for (int i = 0; i < toTransform[0].length; i++) {
int x = i;
int downy = (int) (100 - (toTransform[0][i] * 10));
int upy = 100;
p.setColor(Color.rgb(downy % 256, i % 256, upy % 256));
canvas.drawLine(x, upy, x, downy, p);
}
imageView.invalidate();
}
}
public void writeAudioDataToFile(RecorderAsynctask asyntask) {
byte data[] = new byte[bufferSize];
String filename = getTempFilename();
FileOutputStream os = null;
try {
os = new FileOutputStream(filename);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int read = 0;
double[] toTransform = new double[256];
if (null != os) {
while (isRecording) {
// sampleRateTextField.setText(recorder.getSampleRate());
int bufferReadResult = recorder.read(data, 0, 256);
for (int i = 0; i < 256 && i < bufferReadResult; i++) {
toTransform[i] = (double) data[i] / 32768.0; // signed
// 16
// bit
}
transformer.ft(toTransform);
asyntask.shareLockedfuntionProgreesUpdate(toTransform);
if (AudioRecord.ERROR_INVALID_OPERATION != read) {
try {
os.write(data);
tempIndex++;
} catch (IOException e) {
e.printStackTrace();
}
}
}
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void stopRecording() {
if (null != recorder) {
isRecording = false;
recorder.stop();
recorder.release();
recorder = null;
recordingThread = null;
}
copyWaveFile(getTempFilename(), getFilename());
deleteTempFile();
}
private void deleteTempFile() {
File file = new File(getTempFilename());
file.delete();
}
private void copyWaveFile(String inFilename, String outFilename) {
FileInputStream in = null;
FileOutputStream out = null;
long totalAudioLen = 0;
long totalDataLen = totalAudioLen + 36;
long longSampleRate = RECORDER_SAMPLERATE;
int channels = 1;
long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels / 8;
byte[] data = new byte[bufferSize];
try {
in = new FileInputStream(inFilename);
out = new FileOutputStream(outFilename);
totalAudioLen = in.getChannel().size();
totalDataLen = totalAudioLen + 36;
WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
longSampleRate, channels, byteRate);
while (in.read(data) != -1) {
out.write(data);
}
in.close();
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen,
long totalDataLen, long longSampleRate, int channels, long byteRate)
throws IOException {
byte[] header = new byte[44];
header[0] = 'R'; // RIFF/WAVE header
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (byte) (totalDataLen & 0xff);
header[5] = (byte) ((totalDataLen >> 8) & 0xff);
header[6] = (byte) ((totalDataLen >> 16) & 0xff);
header[7] = (byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f'; // 'fmt ' chunk
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (byte) channels;
header[23] = 0;
header[24] = (byte) (longSampleRate & 0xff);
header[25] = (byte) ((longSampleRate >> 8) & 0xff);
header[26] = (byte) ((longSampleRate >> 16) & 0xff);
header[27] = (byte) ((longSampleRate >> 24) & 0xff);
header[28] = (byte) (byteRate & 0xff);
header[29] = (byte) ((byteRate >> 8) & 0xff);
header[30] = (byte) ((byteRate >> 16) & 0xff);
header[31] = (byte) ((byteRate >> 24) & 0xff);
header[32] = (byte) (2 * 16 / 8); // block align
header[33] = 0;
header[34] = RECORDER_BPP; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte) (totalAudioLen & 0xff);
header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
out.write(header, 0, 44);
}
public void closeThreadIfisnot() {
recordingThread.cancel(true);
}
}