Question

我正在使用OpenCv解决一个问题。认为有一些图片的图表板或普通板。目的是每当用户使用彩色笔触摸任何图片时发出声音。

到目前为止，我所做的就是指定了一些ROI及其坐标，并且当用户触摸图像中的对象时，我能够发出一些声音。

import numpy as np
import time
import cv2
from pygame import mixer


def state_machine(sumation,sound):

    # Check if blue color object present in the ROI     
    yes = (sumation) > Tiger_thickness[0]*Tiger_thickness[1]*0.8

    # If present play the respective instrument.
    if yes and sound==1:
        elephant_s.play()

    elif yes and sound==2:
        tiger_s.play()
        time.sleep(0.001)



def ROI_analysis(frame,sound):


    # converting the image into HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    # generating mask for 
    mask = cv2.inRange(hsv, blueLower, blueUpper)

    # Calculating the nuber of white pixels depecting the blue color pixels in the ROI
    sumation = np.sum(mask)

    # Function that decides to play the instrument or not.
    state_machine(sumation,sound)


    return mask


Verbsoe = False


# importing the audio files
mixer.init()
elephant_s = mixer.Sound('elephant8.wav')
tiger_s = mixer.Sound('Tiger4.wav')


# HSV range for detecting blue color 
blueLower = (80,150,10)
blueUpper = (120,255,255)

# Frame accusition from webcam/ usbcamera 
camera = cv2.VideoCapture(0)
ret,frame = camera.read()
H,W = frame.shape[:2]



kernel = np.ones((7,7),np.uint8)

# reading the image of Elephant and tiger for augmentation.
Tiger= cv2.resize(cv2.imread('Images/tiger.jpg'),(200,100),interpolation=cv2.INTER_CUBIC)
Elephant = cv2.resize(cv2.imread('Images/elephant.jpg'),(200,100),interpolation=cv2.INTER_CUBIC)


# Setting the ROI area for blue color detection
Tiger_center = [np.shape(frame)[1]*2//8,np.shape(frame)[0]*6//8]
Elephant_center = [np.shape(frame)[1]*6//8,np.shape(frame)[0]*6//8]
Tiger_thickness = [200,100]
Tiger_top = [Tiger_center[0]-Tiger_thickness[0]//2,Tiger_center[1]-Tiger_thickness[1]//2]
Tiger_btm = [Tiger_center[0]+Tiger_thickness[0]//2,Tiger_center[1]+Tiger_thickness[1]//2]

Elephant_thickness = [200,100]
Elephant_top = [Elephant_center[0]-Elephant_thickness[0]//2,Elephant_center[1]-Elephant_thickness[1]//2]
Elephant_btm = [Elephant_center[0]+Elephant_thickness[0]//2,Elephant_center[1]+Elephant_thickness[1]//2]


time.sleep(1)

while True:

    # grab the current frame
    ret, frame = camera.read()
    frame = cv2.flip(frame,1)

    if not(ret):
        break

    # Selecting ROI corresponding to Elephant
    Elephant_ROI = np.copy(frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]])
    mask = ROI_analysis(Elephant_ROI,1)

    # Selecting ROI corresponding to Tiger
    Tiger_ROI = np.copy(frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]])
    mask = ROI_analysis(Tiger_ROI,2)

    # A writing text on an image.
    cv2.putText(frame,'CV Sound',(10,30),2,1,(20,20,20),2)

    # Display the ROI to view the blue colour being detected
    if Verbsoe:
        # Displaying the ROI in the Image
        frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]] = cv2.bitwise_and(frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]],frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]], mask=mask[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]])
        frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]] = cv2.bitwise_and(frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]],frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]],mask=mask[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]])

    # Augmenting the instruments in the output frame.
    else:
        # Augmenting the image of the instruments on the frame.
        frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]] = cv2.addWeighted(Elephant, 1, frame[Elephant_top[1]:Elephant_btm[1],Elephant_top[0]:Elephant_btm[0]], 1, 0)
        frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]] = cv2.addWeighted(Tiger, 1, frame[Tiger_top[1]:Tiger_btm[1],Tiger_top[0]:Tiger_btm[0]], 1, 0)


    cv2.imshow('Output',frame)
    key = cv2.waitKey(1) & 0xFF
    # if the 'q' key is pressed, stop the loop
    if key == ord("q"):
        break

# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()

我需要通过单击图像中的一个对象来获得声音输出。

OpenCV对象跟踪和音频响应

0 个答案: