使用Puppeteer,我能够导航到某个视频src URL,并且MP4(使用chronium的自定义版本)可以正常播放。
现在:我希望能够获取正在播放的视频数据并将其发送到node.js中的某种缓冲区,该缓冲区可以另存为文件或通过websocket发送给客户端或作为响应发送等。 ....但我不确定该怎么做,我所拥有的只是视频播放。
我不能仅将URL发送到node.js,因为要观看视频文件,您必须经历整个伪造者爬网过程(这不仅是静态URL,它还取决于该浏览器会话,因此只有操纵者可以查看它。
SO:我该怎么办才能获得指向nodeJS中文件(或缓冲区)的src URL?这是我当前的代码,如果有帮助的话:
var puppeteer = require("puppeteer-core");
var http=require("https");
var fs=require("fs");
var fetch=require("fetch-node");
(async() => {
var browser = await puppeteer.launch({
executablePath:"./cobchrome/chrome.exe"
});
console.log("Got browser", browser);
var page = await browser.newPage();
console.log(page,"got page");
var agentStr = `Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0`;
var agent = await page.setUserAgent(agentStr);
console.log(agent, "Set the user agent");
// await page.goto("https://drive.google.com/file/d/17tkL8jPlBIh5XtcX_tNhyDV5nSX8v7f8/preview");
await page.goto("https://docs.google.com/file/d/1Cyuh41yNfYZU_zL-MHLf_EPJCYnlT7oJ/preview?enablejsapi=1&playerapiid=player4");
console.log("went to page..");
await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './downloadscob/'})
await page.screenshot({path:"shots/onopen.png"});
// var btn = await page.$(".ndfHFb-c4YZDc ndfHFb-c4YZDc-AHmuwe-Hr88gd-OWB6Me ndfHFb-c4YZDc-vyDMJf-aZ2wEe ndfHFb-c4YZDc-i5oIFb ndfHFb-c4YZDc-e1YmVc ndfHFb-c4YZDc-TSZdd");
// var tst = await page.$("#start-of-content");
var clickEl = ".ndfHFb-c4YZDc-aTv5jf-bVEB4e-RJLb9c";
var newClickID = ".ndfHFb-c4YZDc-aTv5jf-NziyQe-LgbsSe";
var clicker = await page.waitForSelector(newClickID);
console.log(clicker,"got clicker");
await page.screenshot({path:"shots/ongotclicker.png"});
await page.click(clickEl);
console.log("clicked")
await page.screenshot({path:"shots/onclicked.png"});
var frame = await page.waitForSelector("iframe[id=drive-viewer-video-player-object-0]");
console.log(frame, "got video frame");
await page.screenshot({path:"shots/ongotframe.png"});
var cf = await frame.contentFrame();
await page.screenshot({path:"shots/oncf.png"});
console.log(cf, "got content frame");
await cf.waitFor(() => !!document.querySelector("video"))
await page.screenshot({path:"shots/videoappeared.png"});
//await cf.waitFor(30000);
// var videos = await cf.$("video");
// console.log(videos, videos.length, "all videos");
var video = await cf.$("video");
await page.screenshot({path:"shots/selectedvideo.png"});
var videoEl = await cf.evaluate(
v =>{
var result = {};
for(var k in v) {
result[k] = v[k];
}
return result;
},
video
);
var src = videoEl.src;
var file = fs.createWriteStream("down.mp4");
console.log("starting to stream");
var req = http.get(src, r => {
console.log("finished pipin");
r.pipe(file); //I REALLY thought this would work but it doesn't do anything
});
var start = Date.now();
await page.screenshot({path:"shots/evalled_vido.png"});
console.log("$$###VIDEO SOURCE::", "time it took", src);
await page.goto(src);
await page.screenshot({path:"shots/wentToNewPage.png"});
// await page.waitFor(5000);
await page.screenshot({path:"shots/maybeItsPlayingNow.png"});
console.log("ABOUT t oFETHC wit H SOURCE", src)
var content = await page.content();
fs.writeFile("outputagain.txt", content, (re) => {
console.log("saved it?");
})
console.log(content);
// await browser.close();
})();
当前page.content()最后只是获取页面的HTML内容,没有任何二进制数据。......