使用xpath访问<script>标记内的javascript值

时间:2019-09-08 18:45:09

标签: javascript xpath scrapy

来自此标签:

<script type = "text/javascript" > dataLayer.push({
    "pageType": "productPage", "ecommerce": {
        "currencyCode": "EUR",
        "detail": {
            "actionField": {"list": "Detail", "action": "detail"},
            "products": [{
                "name": "Desodorante Spray Alien",
                "id": "10483558",
                "price": "34,95",
                "brand": "MUGLER",
                "category": "higiene\/desodorantes",
                "variant": "100 ML",
                "dimension5": "Mucho stock",
                "dimension6": "Unisex",
                "dimension7": "CLARINS SPAIN, S.A",
                "dimension8": "No",
                "dimension9": "",
                "metric2": 1
            }]
        },
        "impressions": [{
            "name": "Angel men recarga Eau de Toilette",
            "id": "10059432",
            "price": "47.95",
            "brand": "MUGLER",
            "category": "perfumes_hombre_edt",
            "variant": "100 ML ",
            "list": "you may want",
            "position": 1
        }, {
            "name": "Angel men rubber flask Eau de Toilette",
            "id": "10351154",
            "price": "42.95",
            "brand": "MUGLER",
            "category": "perfumes_hombre_edt",
            "variant": "50 ML ",
            "list": "you may want",
            "position": 2
        }, {
            "name": "Alien Shower Milk",
            "id": "10483565",
            "price": "26.00",
            "brand": "MUGLER",
            "category": "higiene_geles",
            "variant": "200 ML ",
            "list": "you may want",
            "position": 3
        }, {
            "name": "Amen Desodorante en Stick",
            "id": "10532706",
            "price": "21.95",
            "brand": "MUGLER",
            "category": "hombre_desodorantes",
            "variant": "75 ML ",
            "list": "you may want",
            "position": 4
        }]
    }
});
(window["rrApiOnReady"] = window["rrApiOnReady"] || []).push(function () {
    retailrocket.productsGroup.post({
        "groupId": 10483558,
        "name": "Desodorante Spray Alien",
        "price": 34.95,
        "pictureUrl": "https://ima.douglas.es/img/1467/desodorante_spray_alien-0-.png",
        "url": "https://douglas.es/p/mugler/desodorante_spray_alien",
        "isAvailable": true,
        "categoryPaths": ["Higiene/Corporal", "Corporal", "Corporal/Higiene", "Higiene", "Higiene/Desodorante", "Marca/Mugler"],
        "description": "El elixir de feminidad y de sensualidad del Eau de Parfum Alien en su versión desodorante en spray. Déjate envolver con los mismos acordes de la fragancia.",
        "vendor": "MUGLER",
        "products": {
            "10483558": {
                "isAvailable": true,
                "name": "Desodorante Spray Alien",
                "size": "100",
                "url": "https://douglas.es/p/mugler/desodorante_spray_alien",
                "pictureUrl": "https://ima.douglas.es/img/1467/desodorante_spray_alien-0-.png",
                "price": 34.95,
                "oldPrice": 34.95,
                "params": {}
            }
        },
        "params": {"medida": "ML", "subTitle": "Todo tipo de piel"},
        "model": "Desodorante Spray Alien",
        "typePrefix": "higiene_desodorantes",
        "oldPrice": 34.95
    });
    rrApi.groupView([10483558]);
});
App.page.webshop = "DOU";
App.page.warehouse = ["ALM"];
App.page.codPostal = "";
</script>

我需要访问一些不同功能的特定值(产品,产品。品牌,印象和印象.id)。

我试图将其转换为json字典,但是它是一个列表,并且被转换为数组,因此无法使用“名称”值进行访问。

我该怎么做?

1 个答案:

答案 0 :(得分:0)

一个示例是使用仅在此脚本中存在的一些唯一关键字选择脚本。一旦有了该值,就可以使用re_first获得所需的值。以此为例:

        product_css = "script:contains('productPage')::text"
        regex = 'name": "(.+?)"'
    product_name = response.css(product_count_css).re_first(regex)

这将提取名字。您可以使用re对其进行进一步调整。祝你好运。