Question

我尝试使用语音识别制作Android应用程序，但不幸的是谷歌不支持我的语言（MACEDONIAN），我试图比较两种录音。

我使用http://code.google.com/p/musicg/来记录和比较语音，并且我在初始化时堆叠了用于检测语音的设置。有人可以告诉我如何重写此init函数以进行语音检测这对我来说非常重要......或者其他一些想法怎么做。

这是口哨检测的初始化

            // settings for detecting a whistle

            minFrequency = 600.0f;
            maxFrequency = Double.MAX_VALUE;

            minIntensity = 100.0f;
            maxIntensity = 100000.0f;

            minStandardDeviation = 0.1f;
            maxStandardDeviation = 1.0f;

            highPass = 500;
            lowPass = 10000;

            minNumZeroCross = 50;
            maxNumZeroCross = 200;

            numRobust = 10;

Answer 1

我的理解是，音乐组件DetectionApi，仅用于分析单个声音块并告诉您它是否包含声音的类型。如包括哨子或拍手api的例子。我是一个鼓掌/它是一个哨子。

使用musicg，您可能做的最好的事情就是识别声音是否是声音......尽管这可能超出了DetectionApi。

既然你说谷歌api不支持马其顿语，也许你可以试试Pocketsphinx中提到的this stackoverflow article。

Answer 2

首先，您所要做的就是将录制的声音保存到wav中，然后很容易使用API中的指纹类https://code.google.com/p/musicg/source/browse/#git%2Fsrc%2Fcom%2Fmusicg%2Ffingerprint

以下是我在进行比较的方式，一个临时录制的WAV声音和我的数据中的所有wav声音。

public Cursor FP(String recordedClip, Context context) {

    Baza baza = new Baza(context);

    Cursor allSound = baza.getAllProtocolsForSoundCheck();

    List<Protocol> protocols = new ArrayList<Protocol>();
    int PID =-1;

    Log.d("broj",allSound.getCount()+"");

    for (int i = 0; i < allSound.getCount(); i++) {


        Protocol protocol = new Protocol();
        allSound.moveToNext();
        protocol.setSoundPath(allSound.getString(4));
        protocol.setId(Integer.parseInt(allSound.getString(1)));
        protocols.add(protocol);

        Log.d("brojProtocol",allSound.getString(2)+" ");
        baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1);
    }

    Wave record = new Wave(recordedClip);

    List<Wave> waves = new ArrayList<Wave>();

    if (protocols != null) {
        for (int i = 0; i < protocols.size(); i++) {
            waves.add(new Wave(protocols.get(i).getSoundPath()));
        }
    }

    for (int i = 0; i < waves.size(); i++) {

        Log.d("similarity", record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity()+"");


        baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(),
                record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity());
    }

    Cursor similarCursor = baza.getSimilarProtocols();
    similarCursor.moveToFirst();
    TransferClass protocolForTransfer = new TransferClass();
    protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1)));
    protocolForTransfer.setName(similarCursor.getString(2));

    Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId());
//  return protocolForTransfer;

    return similarCursor;
}

Answer 3

以下是我将临时录制的声音保存为wav格式的方法：

public class RecorderActivity {

    private static final int RECORDER_BPP = 16;
    private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
    private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound";
    private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";
    private String AUDIO_RECORDER_FILE = "";
    private static final int RECORDER_SAMPLERATE = 8000;
    private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
    private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;

    private RealDoubleFFT transformer;

    EndPointDetection endpoint;

    int blockSize = 256;

    private AudioRecord recorder = null;
    private int bufferSize = 0;
    private RecorderAsynctask recordingThread = null;
    private boolean isRecording = false;

    float tempFloatBuffer[] = new float[3];
    int tempIndex = 0;
    int totalReadBytes = 0;

    ImageView imageView;
    Bitmap bitmap;
    Canvas canvas;
    Paint paint;

    Context con;

    RecorderActivity(String file, Context con, ImageView image) {
        AUDIO_RECORDER_FILE = file;
        this.con = con;

        this.imageView = image;
        bitmap = Bitmap.createBitmap((int) 256, (int) 100,
                Bitmap.Config.ARGB_8888);
        canvas = new Canvas(bitmap);
        paint = new Paint();
        paint.setStrokeWidth(5);
        paint.setColor(Color.BLUE);
        imageView.setImageBitmap(bitmap);

        transformer = new RealDoubleFFT(256);

        bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE,
                RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING);

    }

    public String getFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV);
    }

    private String getTempFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE);

        if (tempFile.exists())
            tempFile.delete();

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
    }

    public void startRecording() {
        recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
                RECORDER_SAMPLERATE, RECORDER_CHANNELS,
                RECORDER_AUDIO_ENCODING, bufferSize);

        recorder.startRecording();

        isRecording = true;

        recordingThread = new RecorderAsynctask();
        recordingThread.execute(this);

    }

    class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void> {

        public void shareLockedfuntionProgreesUpdate(double[] fttrezult) {

            publishProgress(fttrezult);

        }

        @Override
        protected Void doInBackground(RecorderActivity... params) {
            // TODO Auto-generated method stub

            byte data[] = new byte[bufferSize];
            String filename = getTempFilename();
            FileOutputStream os = null;

            try {
                os = new FileOutputStream(filename);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            int read = 0;

            AudioTrack tempAudioTrack;

            double[] toTransform = new double[blockSize];

            if (null != os) {
                while (isRecording) {
                    // sampleRateTextField.setText(recorder.getSampleRate());

                    int bufferReadResult = recorder.read(data, 0, blockSize);

                    for (int i = 0; i < blockSize && i < bufferReadResult; i++) {
                        toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                        // 16
                                                                        // bit
                    }

                    transformer.ft(toTransform);
                    publishProgress(toTransform);

                    if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                        try {

                            os.write(data);
                            tempIndex++;

                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                }

                try {
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

            return null;
        }

        @Override
        protected void onProgressUpdate(double[]... toTransform) {
            canvas.drawColor(Color.GRAY);
            Paint p = new Paint();
            for (int i = 0; i < toTransform[0].length; i++) {

                int x = i;
                int downy = (int) (100 - (toTransform[0][i] * 10));
                int upy = 100;
                p.setColor(Color.rgb(downy % 256, i % 256, upy % 256));
                canvas.drawLine(x, upy, x, downy, p);

            }
            imageView.invalidate();
        }

    }

    public void writeAudioDataToFile(RecorderAsynctask asyntask) {
        byte data[] = new byte[bufferSize];
        String filename = getTempFilename();
        FileOutputStream os = null;

        try {
            os = new FileOutputStream(filename);
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        int read = 0;

        double[] toTransform = new double[256];

        if (null != os) {
            while (isRecording) {
                // sampleRateTextField.setText(recorder.getSampleRate());

                int bufferReadResult = recorder.read(data, 0, 256);

                for (int i = 0; i < 256 && i < bufferReadResult; i++) {
                    toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                    // 16
                                                                    // bit
                }

                transformer.ft(toTransform);
                asyntask.shareLockedfuntionProgreesUpdate(toTransform);

                if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                    try {

                        os.write(data);
                        tempIndex++;

                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }

            }

            try {
                os.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void stopRecording() {
        if (null != recorder) {
            isRecording = false;

            recorder.stop();
            recorder.release();

            recorder = null;
            recordingThread = null;
        }

        copyWaveFile(getTempFilename(), getFilename());
        deleteTempFile();
    }

    private void deleteTempFile() {
        File file = new File(getTempFilename());

        file.delete();
    }

    private void copyWaveFile(String inFilename, String outFilename) {
        FileInputStream in = null;
        FileOutputStream out = null;
        long totalAudioLen = 0;
        long totalDataLen = totalAudioLen + 36;
        long longSampleRate = RECORDER_SAMPLERATE;
        int channels = 1;
        long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels / 8;

        byte[] data = new byte[bufferSize];

        try {
            in = new FileInputStream(inFilename);
            out = new FileOutputStream(outFilename);
            totalAudioLen = in.getChannel().size();
            totalDataLen = totalAudioLen + 36;

            WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                    longSampleRate, channels, byteRate);

            while (in.read(data) != -1) {
                out.write(data);
            }

            in.close();
            out.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen,
            long totalDataLen, long longSampleRate, int channels, long byteRate)
            throws IOException {

        byte[] header = new byte[44];

        header[0] = 'R'; // RIFF/WAVE header
        header[1] = 'I';
        header[2] = 'F';
        header[3] = 'F';
        header[4] = (byte) (totalDataLen & 0xff);
        header[5] = (byte) ((totalDataLen >> 8) & 0xff);
        header[6] = (byte) ((totalDataLen >> 16) & 0xff);
        header[7] = (byte) ((totalDataLen >> 24) & 0xff);
        header[8] = 'W';
        header[9] = 'A';
        header[10] = 'V';
        header[11] = 'E';
        header[12] = 'f'; // 'fmt ' chunk
        header[13] = 'm';
        header[14] = 't';
        header[15] = ' ';
        header[16] = 16; // 4 bytes: size of 'fmt ' chunk
        header[17] = 0;
        header[18] = 0;
        header[19] = 0;
        header[20] = 1; // format = 1
        header[21] = 0;
        header[22] = (byte) channels;
        header[23] = 0;
        header[24] = (byte) (longSampleRate & 0xff);
        header[25] = (byte) ((longSampleRate >> 8) & 0xff);
        header[26] = (byte) ((longSampleRate >> 16) & 0xff);
        header[27] = (byte) ((longSampleRate >> 24) & 0xff);
        header[28] = (byte) (byteRate & 0xff);
        header[29] = (byte) ((byteRate >> 8) & 0xff);
        header[30] = (byte) ((byteRate >> 16) & 0xff);
        header[31] = (byte) ((byteRate >> 24) & 0xff);
        header[32] = (byte) (2 * 16 / 8); // block align
        header[33] = 0;
        header[34] = RECORDER_BPP; // bits per sample
        header[35] = 0;
        header[36] = 'd';
        header[37] = 'a';
        header[38] = 't';
        header[39] = 'a';
        header[40] = (byte) (totalAudioLen & 0xff);
        header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
        header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
        header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

        out.write(header, 0, 44);
    }

    public void closeThreadIfisnot() {
        recordingThread.cancel(true);
    }
}

语音识别和声音与音乐比较

3 个答案: