有没有一种方法可以使用Python同步实时音频和实时视频?

时间:2020-05-10 04:30:32

标签: python multithreading sockets audio synchronization

我正在尝试编写一些代码来使用Python 3创建一个2人实时聊天应用程序。该代码在具有实时音频和实时视频的意义上有效(如通过在服务器上运行服务器和2个客户端进行测试)相同的设备),其中客户端代码运行大多数逻辑,并运行摄像头和音频提要以通过服务器发送(这也作为两个客户端之间的握手形式,除了客户端之外几乎没有逻辑)发送视频和/或音频以将其发送给其他客户端),以获取客户端代码来播放音频和视频。

警告,此代码有点混乱,并且并未针对快速性能进行优化

客户端代码:

import cv2
from socket import socket, AF_INET, SOCK_STREAM
from imutils.video import WebcamVideoStream
import pyaudio
from array import array
from threading import Thread
import numpy as np
import zlib
import struct

HOST = input("Enter Server IP\n")
PORT_VIDEO = 3000
PORT_AUDIO = 4000

BufferSize = 4096
CHUNK=1024
lnF = 640*480*3
FORMAT=pyaudio.paInt16
CHANNELS=2
RATE=44100

def SendAudio():
    while True:
        data = stream.read(CHUNK)
        dataChunk = array('h', data)
        vol = max(dataChunk)
        clientAudioSocket.sendall(data)

def RecieveAudio():
    while True:
        data = recvallAudio(BufferSize)
        stream.write(data)

def recvallAudio(size):
    databytes = b''
    while len(databytes) != size:
        to_read = size - len(databytes)
        if to_read > (4 * CHUNK):
            databytes += clientAudioSocket.recv(4 * CHUNK)
        else:
            databytes += clientAudioSocket.recv(to_read)
    return databytes

def SendFrame():
    while True:
        try:
            frame = wvs.read()
            cv2_im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (640, 480))
            frame = np.array(frame, dtype = np.uint8).reshape(1, lnF)
            jpg_as_text = bytearray(frame)

            databytes = zlib.compress(jpg_as_text, 9)
            length = struct.pack('!I', len(databytes))
            bytesToBeSend = b''
            clientVideoSocket.sendall(length)
            while len(databytes) > 0:
                if (5000 * CHUNK) <= len(databytes):
                    bytesToBeSend = databytes[:(5000 * CHUNK)]
                    databytes = databytes[(5000 * CHUNK):]
                    clientVideoSocket.sendall(bytesToBeSend)
                else:
                    bytesToBeSend = databytes
                    clientVideoSocket.sendall(bytesToBeSend)
                    databytes = b''
            # ~ print("##### Data Sent!! #####")
        except:
            continue


def RecieveFrame():
    while True:
        try:
            lengthbuf = recvallVideo(4)
            length, = struct.unpack('!I', lengthbuf)
            databytes = recvallVideo(length)
            img = zlib.decompress(databytes)
            if len(databytes) == length:
                # ~ print("Recieving Media..")
                # ~ print("Image Frame Size:- {}".format(len(img)))
                img = np.array(list(img))
                img = np.array(img, dtype = np.uint8).reshape(480, 640, 3)
                cv2.imshow("Stream", img)
                if cv2.waitKey(1) == 27:
                    cv2.destroyAllWindows()
            else:
                print("Data CORRUPTED")
        except:
            continue


def recvallVideo(size):
    databytes = b''
    while len(databytes) != size:
        to_read = size - len(databytes)
        if to_read > (5000 * CHUNK):
            databytes += clientVideoSocket.recv(5000 * CHUNK)
        else:
            databytes += clientVideoSocket.recv(to_read)
    return databytes



clientVideoSocket = socket(family=AF_INET, type=SOCK_STREAM)
clientVideoSocket.connect((HOST, PORT_VIDEO))
wvs = WebcamVideoStream(0).start()

clientAudioSocket = socket(family=AF_INET, type=SOCK_STREAM)
clientAudioSocket.connect((HOST, PORT_AUDIO))

audio=pyaudio.PyAudio()
stream=audio.open(format=FORMAT,channels=CHANNELS, rate=RATE, input=True, output = True,frames_per_buffer=CHUNK)

initiation = clientVideoSocket.recv(5).decode()

if initiation == "start":
    SendFrameThread = Thread(target=SendFrame).start()
    SendAudioThread = Thread(target=SendAudio).start()
    RecieveFrameThread = Thread(target=RecieveFrame).start()
    RecieveAudioThread = Thread(target=RecieveAudio).start()

服务器端代码:

from socket import socket, AF_INET, SOCK_STREAM
from threading import Thread
import struct

HOST = input("Enter Host IP\n")
PORT_VIDEO = 3000
PORT_AUDIO = 4000
lnF = 640*480*3
CHUNK = 1024
BufferSize = 4096
addressesAudio = {}
addresses = {}
threads = {}

def ConnectionsVideo():
    while True:
        try:
            clientVideo, addr = serverVideo.accept()
            print("{} is connected!!".format(addr))
            addresses[clientVideo] = addr
            if len(addresses) > 1:
                for sockets in addresses:
                    if sockets not in threads:
                        threads[sockets] = True
                        sockets.send(("start").encode())
                        Thread(target=ClientConnectionVideo, args=(sockets, )).start()
            else:
                continue
        except:
            continue

def ConnectionsSound():
    while True:
        try:
            clientAudio, addr = serverAudio.accept()
            print("{} is connected!!".format(addr))
            addressesAudio[clientAudio] = addr
            Thread(target=ClientConnectionSound, args=(clientAudio, )).start()
        except:
            continue

def ClientConnectionVideo(clientVideo):
    while True:
        try:
            lengthbuf = recvall(clientVideo, 4)
            length, = struct.unpack('!I', lengthbuf)
            recvall(clientVideo, length)
        except:
            continue

def ClientConnectionSound(clientAudio):
    while True:
        try:
            data = clientAudio.recv(BufferSize)
            broadcastSound(clientAudio, data)
        except:
            continue

def recvall(clientVideo, BufferSize):
        databytes = b''
        i = 0
        while i != BufferSize:
            to_read = BufferSize - i
            if to_read > (1000 * CHUNK):
                databytes = clientVideo.recv(1000 * CHUNK)
                i += len(databytes)
                broadcastVideo(clientVideo, databytes)
            else:
                if BufferSize == 4:
                    databytes += clientVideo.recv(to_read)
                else:
                    databytes = clientVideo.recv(to_read)
                i += len(databytes)
                if BufferSize != 4:
                    broadcastVideo(clientVideo, databytes)
        # ~ print("YES!!!!!!!!!" if i == BufferSize else "NO!!!!!!!!!!!!")
        if BufferSize == 4:
            broadcastVideo(clientVideo, databytes)
            return databytes

def broadcastVideo(clientSocket, data_to_be_sent):
    for clientVideo in addresses:
        if clientVideo != clientSocket:
            clientVideo.sendall(data_to_be_sent)

def broadcastSound(clientSocket, data_to_be_sent):
    for clientAudio in addressesAudio:
        if clientAudio != clientSocket:
            clientAudio.sendall(data_to_be_sent)

serverVideo = socket(family=AF_INET, type=SOCK_STREAM)
try:
    serverVideo.bind((HOST, PORT_VIDEO))
except OSError:
    print("Server Busy")

serverAudio = socket(family=AF_INET, type=SOCK_STREAM)
try:
    serverAudio.bind((HOST, PORT_AUDIO))
except OSError:
    print("Server Busy")

serverAudio.listen(2)
print("Waiting for audio connection..")
AcceptThreadAudio = Thread(target=ConnectionsSound)
AcceptThreadAudio.start()


serverVideo.listen(2)
print("Waiting for video connection..")
AcceptThreadVideo = Thread(target=ConnectionsVideo)
AcceptThreadVideo.start()
AcceptThreadVideo.join()
serverVideo.close()

这两个代码都是我在比我想要的时间更长的时间内完成的,但是我注意到在对这些代码进行一些测试时,音频和视频的偏移量介于3到7之间秒。我尝试了一些疑难解答,但除了看起来像小孩一样将所有代码放在一起之外,找不到代码的任何问题。我可以导入一个新的Python包,但我希望使用已经存在的Python包。请记住,我正在考虑最终将两种代码都转换为单独的.exe文件,以便于轻松访问以将代码发送给个人。

0 个答案:

没有答案