如何仅获取页面上我感兴趣的HTML?

时间:2019-09-09 01:32:42

标签: javascript html css tampermonkey

所以,伙计们,我正在为自己的表演编辑音乐网站的设计。为此,我编写了一个小脚本以放入Tampermonkey。

不知何故,我只需要获取歌曲标题,作者和所有段落。我该怎么办?

示例歌曲:Lyrics

<script>
window.onload = () => {
    // Removal of page elements
    document.getElementsByClassName("main-header g-mb")[0].remove(); // Header
    document.querySelector('#breadcrumb').remove(); // Index
    document.querySelector('.cnt-space-top').remove(); // Advertising
    document.querySelector(".cnt-head > a").remove(); // Artist Image
    document.querySelector('.cnt-info_exib').remove(); // Info above the video
    document.querySelector('.cnt-info').remove(); // Info above the video
    document.querySelector('.letra-menu').remove(); // Letter-side info
    document.querySelector('.pl_media').remove(); // Music video
    document.querySelector('.letra-info').remove(); // Info under the letter
    document.querySelector('.letra-share').remove(); // Info under the letter
    document.querySelector('#comments').remove(); // Comments Section
    document.querySelector('#cnt_footer').remove(); // Most Listened Section
    document.getElementsByClassName("g-1")[1].remove(); // Music Footer
    document.getElementsByClassName("g-1")[2].remove(); // Music Footer

    let headerMusic = document.querySelector(".cnt-head"); // Music header
    //headerMusic.style.padding = "0px";
    headerMusic.style.margin = "auto"; // Letter margin
    headerMusic.style.position = "relative";

    let nameMusic = document.querySelector("h1"); // Name of the song
    nameMusic.style.fontSize = "45px";
    nameMusic.style.fontWeight = "bold";

    let artistMusic = document.querySelector("h2 > a"); // Music artist
    artistMusic.style.padding = "0px";
    artistMusic.style.fontSize = "30px";
    artistMusic.style.fontWeight = "bold";

    let letterMusic = document.querySelector('.cnt-letra'); // Lyrics
    letterMusic.style.padding = "0px";
    letterMusic.style.fontSize = "40px";
    letterMusic.style.fontWeight = "bold";

    let allBody = document.querySelector('body');
    allBody.style.textAlign = "center"; // Leave letter centered
    allBody.style.margin = "0px 100px"; // Letter margin
</script>

1 个答案:

答案 0 :(得分:0)

如果您的IE> = 9与DOMParser兼容,则可以像这样简单地读取和解析页面。
在Chrome&FF中,您最终会遇到CORS问题。 但是,猴子也可以使用相同的逻辑-歌曲名称位于2个H1标签中的1个中,乐队名称位于1st H2中,而P标签中的歌曲则以DIV环绕。 或者也可以在node.js中以类似的方式进行操作而不会出现CORS问题,包括文件访问等。

<script>
loadPage("https://www.letras.mus.br/queen/64295/");
function loadPage(url) {
    var http = new XMLHttpRequest();
    http.open('GET', url, true);
    http.onloadend = function () {
        var html = new DOMParser().parseFromString(this.responseText, "text/html")
        html = html.documentElement;
        var el = html.getElementsByTagName("H1");
        var interesting = el[el.length-1].outerHTML + "\n";
        el = html.getElementsByTagName("H2")[0];
        el.innerHTML = el.innerText;
        interesting += el.outerHTML  + "\n";
        interesting += html.getElementsByTagName("P")[0].parentElement.innerHTML
        document.write(
    "<style>\nbody { background-color:transparent; font-family:Arial, sans-serif; font-size:19px; opacity:1; word-wrap:break-word; margin:0px; }\n" +
    "h1 { color: rgb(255, 102, 0); font-size: 25px; font-weight: 700; letter-spacing: -1px; margin:0px; }\n" +
    "h2,a { color:rgb(183, 183, 0); font-size: 19px; font-weight: 700; letter-spacing: -1px; text-decoration: none; margin:0px; }\n" +
    "p { color: rgb(68, 68, 68); font-weight: 400; line-height: 30.4px; margin-bottom: 30.4px; }\n</style>\n"
        + interesting);
        document.close();
    }

    http.send();
}
</script>

这里是 node.js 版本,用于保存您要归档的内容:

const request = require('request');
const jsdom = require("jsdom");
var fs = require('fs');

loader("https://www.letras.mus.br/queen/64295/");

function processDOM(body) {
    var dom = new jsdom.JSDOM(body);
    var html = dom.window.document;
    // Song name
    var el = html.querySelectorAll("H1");
    el = el[el.length - 1];
    var items = [el.textContent];
    var interesting = "<style>\nbody { background-color:transparent; font-family:Arial, sans-serif; font-size:19px; opacity:1; word-wrap:break-word; margin:0px; }\n" +
    "h1 { color: rgb(255, 102, 0); font-size: 25px; font-weight: 700; letter-spacing: -1px; margin:0px; }\n" +
    "h2,a { color:rgb(183, 183, 0); font-size: 19px; font-weight: 700; letter-spacing: -1px; text-decoration: none; margin:0px; }\n" +
    "p { color: rgb(68, 68, 68); font-weight: 400; line-height: 30.4px; margin-bottom: 30.4px; }\n</style>\n" +
    el.outerHTML + "\n";
    // Band
    el = html.querySelector("H2");
    items.push(el.textContent.trim());
    el.innerHTML = items[1];
    interesting += el.outerHTML + "\n";
    // Lyrics
    el = html.querySelector("P").parentElement.innerHTML;
    interesting += el.replace(/<br>/g, '<br>\n').replace(/<\/p>/g, '</p>\n');
    items.push(interesting);
    return items;
}

// Based on https://stackoverflow.com/questions/38428027/why-await-is-not-working-for-node-request-module#38428075
async function loader(url) {
    var res = await doRequest(url);
    // Save new simple page
    var pageName = res[0].replace(/\s/g, '_') + ".htm"; // song_name.htm
    fs.writeFile(pageName, res[2], function (err) { // html data
        if (err) throw err;
        console.log(pageName + ' saved.');
    });
}

function doRequest(url) {
    return new Promise(function (resolve, reject) {
        request(url, function (error, res, body) {
            if (!error && res.statusCode == 200) {
                resolve(processDOM(body));
            } else {
                reject(error);
            }
        });
    });
}

小糖-元素计算机样式打印机-它会创建一个空元素,获取您计算机的样式并打印不同的值。

var el=prompt('Element:',"body"), defaultStyles, computedStyles;
defaultStyles = getStyles({}, window.getComputedStyle(document.createElement(el)));
computedStyles = el + ' ' + JSON.stringify(getStyles(defaultStyles, window.getComputedStyle(document.querySelector(el))), null, 3)
.replace(/\"([^\"]+)\": \"([^\"]+)\",/g,"$1: $2;").replace(/\n/g, "\r\n");
function getStyles(defaultStyles, computedStyles) {
    var content = {};
    for (var i=0; i<computedStyles.length; i++) {
        cssProperty = computedStyles[i];
        cssValue = computedStyles.getPropertyValue(cssProperty);
        if(defaultStyles[cssProperty] != cssValue)
            content[cssProperty] = cssValue;
    }
    return content;
}
console.log(computedStyles)
//prompt('Styles: ', computedStyles);

prompt("Copy bookmarklet:", 'javascript:var el=prompt("Element:","body"), defaultStyles, computedStyles;defaultStyles = getStyles({}, window.getComputedStyle(document.createElement(el)));computedStyles = el + " " + JSON.stringify(getStyles(defaultStyles, window.getComputedStyle(document.querySelector(el))), null, 3).replace(/\\"([^\\"]+)\\": \\"([^\\"]+)\\",/g,"$1: $2;").replace(/\\n/g, "\\r\\n");function getStyles(defaultStyles, computedStyles) {var content = {};for (var i=0; i<computedStyles.length; i++) {cssProperty = computedStyles[i];cssValue = computedStyles.getPropertyValue(cssProperty);if(defaultStyles[cssProperty] != cssValue)content[cssProperty] = cssValue;}return content;}prompt("Styles: ", computedStyles),undefined')