我正在使用OpenCv解决一个问题。认为有一些图片的图表板或普通板。目的是每当用户使用彩色笔触摸任何图片时发出声音。
到目前为止,我所做的就是指定了一些ROI及其坐标,并且当用户触摸图像中的对象时,我能够发出一些声音。
import numpy as np
import time
import cv2
from pygame import mixer
def state_machine(sumation,sound):
# Check if blue color object present in the ROI
yes = (sumation) > Tiger_thickness[0]*Tiger_thickness[1]*0.8
# If present play the respective instrument.
if yes and sound==1:
elephant_s.play()
elif yes and sound==2:
tiger_s.play()
time.sleep(0.001)
def ROI_analysis(frame,sound):
# converting the image into HSV
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# generating mask for
mask = cv2.inRange(hsv, blueLower, blueUpper)
# Calculating the nuber of white pixels depecting the blue color pixels in the ROI
sumation = np.sum(mask)
# Function that decides to play the instrument or not.
state_machine(sumation,sound)
return mask
Verbsoe = False
# importing the audio files
mixer.init()
elephant_s = mixer.Sound('elephant8.wav')
tiger_s = mixer.Sound('Tiger4.wav')
# HSV range for detecting blue color
blueLower = (80,150,10)
blueUpper = (120,255,255)
# Frame accusition from webcam/ usbcamera
camera = cv2.VideoCapture(0)
ret,frame = camera.read()
H,W = frame.shape[:2]
kernel = np.ones((7,7),np.uint8)
# reading the image of Elephant and tiger for augmentation.
Tiger= cv2.resize(cv2.imread('Images/tiger.jpg'),(200,100),interpolation=cv2.INTER_CUBIC)
Elephant = cv2.resize(cv2.imread('Images/elephant.jpg'),(200,100),interpolation=cv2.INTER_CUBIC)
# Setting the ROI area for blue color detection
Tiger_center = [np.shape(frame)[1]*2//8,np.shape(frame)[0]*6//8]
Elephant_center = [np.shape(frame)[1]*6//8,np.shape(frame)[0]*6//8]
Tiger_thickness = [200,100]
Tiger_top = [Tiger_center[0]-Tiger_thickness[0]//2,Tiger_center[1]-Tiger_thickness[1]//2]
Tiger_btm = [Tiger_center[0]+Tiger_thickness[0]//2,Tiger_center[1]+Tiger_thickness[1]//2]
Elephant_thickness = [200,100]
Elephant_top = [Elephant_center[0]-Elephant_thickness[0]//2,Elephant_center[1]-Elephant_thickness[1]//2]
Elephant_btm = [Elephant_center[0]+Elephant_thickness[0]//2,Elephant_center[1]+Elephant_thickness[1]//2]
time.sleep(1)
while True:
# grab the current frame
ret, frame = camera.read()
frame = cv2.flip(frame,1)
if not(ret):
break
# Selecting ROI corresponding to Elephant
Elephant_ROI = np.copy(frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]])
mask = ROI_analysis(Elephant_ROI,1)
# Selecting ROI corresponding to Tiger
Tiger_ROI = np.copy(frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]])
mask = ROI_analysis(Tiger_ROI,2)
# A writing text on an image.
cv2.putText(frame,'CV Sound',(10,30),2,1,(20,20,20),2)
# Display the ROI to view the blue colour being detected
if Verbsoe:
# Displaying the ROI in the Image
frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]] = cv2.bitwise_and(frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]],frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]], mask=mask[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]])
frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]] = cv2.bitwise_and(frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]],frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]],mask=mask[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]])
# Augmenting the instruments in the output frame.
else:
# Augmenting the image of the instruments on the frame.
frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]] = cv2.addWeighted(Elephant, 1, frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]], 1, 0)
frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]] = cv2.addWeighted(Tiger, 1, frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]], 1, 0)
cv2.imshow('Output',frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()
我需要通过单击图像中的一个对象来获得声音输出。