每次OpenCV人脸检测返回边界框时,我都会播放Pyaudio文件。因此,基本上对于边界框的每次坐标更新,我们都将wav数据写入pyaudio流。流像我想要的那样在每次面部检测事件中播放音频文件,但是在将数据写入流中仅几秒钟之后,音频就变得太安静而无法听到。
我正在下面的while循环中将音频数据写入流中:
完整代码:
import cv2
import numpy as np
import pyaudio
import wave
import sys
CHUNK = 1024
wf = wave.open("audio files\light.wav", 'r')
print(wf.getframerate())
# instantiate PyAudio
p = pyaudio.PyAudio()
# open stream
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# read data
data = wf.readframes(CHUNK)
# play sound
# while len(data) > 0:
# stream.write(data)
# data = wf.readframes(CHUNK)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
cap = cv2.VideoCapture(0)
while True:
ret, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
data = wf.readframes(CHUNK)
stream.write(data)
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
cv2.imshow('img', img)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# stop stream
stream.stop_stream()
stream.close()
# close PyAudio
p.terminate()
cap.release()
cv2.destroyAllWindows()