将特定属性从Json刮到新的Json

时间:2018-12-06 10:56:29

标签: javascript json scrape

我有一个wordpress网站,并且我有一个API,可以将帖子导出为Json。我正在用此新闻建立一个新网站,但是我必须获取这个Json文件,并以某种方式将其格式化为带有我的属性名称的新Json文件。我也想从中丢弃一些属性。因此,这是我从帖子中得到的一个Json的简单示例:

return

如何制作脚本,例如以以下格式输出此json:

{
  "status": "ok",
  "post": {
    "id": 2335,
    "type": "post",
    "slug": "litoral-awards14-no-jornal-diario-porto-canal",
    "url": "https:\/\/litoralmagazine.com\/litoral-awards14-no-jornal-diario-porto-canal\/",
    "status": "publish",
    "title": "Litoral Awards’14 no Jornal Di\u00e1rio do Porto Canal",
    "title_plain": "Litoral Awards’14 no Jornal Di\u00e1rio do Porto Canal",
    "content": "<div id=\"tps_slideContainer_2335\" class=\"theiaPostSlider_slides\"><div>\n\n\n\n<\/div><\/div><div class=\"theiaPostSlider_footer _footer\"><\/div><p><!-- END THEIA POST SLIDER --><\/p>\n\n                <script type='text\/javascript'>\n                    \n                var tpsInstance;\n                var tpsOptions = {\"slideContainer\":\"#tps_slideContainer_2335\",\"nav\":[\".theiaPostSlider_nav\"],\"navText\":\"%{currentSlide} de %{totalSlides}\",\"helperText\":\"\",\"defaultSlide\":0,\"transitionEffect\":\"slide\",\"transitionSpeed\":400,\"keyboardShortcuts\":true,\"scrollAfterRefresh\":true,\"numberOfSlides\":1,\"slides\":[],\"useSlideSources\":true,\"themeType\":\"font\",\"prevText\":\"Anterior\",\"nextText\":\"Seguinte\",\"buttonWidth\":\"0\",\"buttonWidth_post\":\"0\",\"postUrl\":\"https:\\\/\\\/litoralmagazine.com\\\/litoral-awards14-no-jornal-diario-porto-canal\\\/\",\"postId\":2335,\"refreshAds\":false,\"refreshAdsEveryNSlides\":\"1\",\"adRefreshingMechanism\":\"javascript\",\"siteUrl\":\"https:\\\/\\\/litoralmagazine.com\",\"loopSlides\":false,\"scrollTopOffset\":\"0\",\"prevFontIcon\":\"<span aria-hidden=\\\"true\\\" class=\\\"tps-icon-chevron-circle-left\\\"><\\\/span>\",\"nextFontIcon\":\"<span aria-hidden=\\\"true\\\" class=\\\"tps-icon-chevron-circle-right\\\"><\\\/span>\"};\n            \n                    (function ($) {\n                        $(document).ready(function () {\n                            \n                            tpsInstance = new tps.createSlideshow(tpsOptions);\n                        });\n                    }(jQuery));\n                <\/script>\n            ",
    "excerpt": "",
    "date": "2014-12-13 12:02:05",
    "modified": "2016-05-18 09:31:00",
    "categories": [
      {
        "id": 299,
        "slug": "litoral-awards",
        "title": "Litoral Awards",
        "description": "",
        "parent": 0,
        "post_count": 91
      },
      {
        "id": 342,
        "slug": "clipping-2014",
        "title": "clipping-2014",
        "description": "",
        "parent": 0,
        "post_count": 3
      },
      {
        "id": 573,
        "slug": "litoral-awards-2014",
        "title": "Litoral Awards 2014",
        "description": "",
        "parent": 0,
        "post_count": 21
      }
    ],
    "tags": [
      {
        "id": 82,
        "slug": "featured2",
        "title": "Featured2",
        "description": "",
        "post_count": 10
      },
      {
        "id": 312,
        "slug": "litoral-awards-2015",
        "title": "litoral awards 2015",
        "description": "",
        "post_count": 19
      }
    ],
    "author": {
      "id": 4,
      "slug": "litoral-magazine",
      "name": "Litoral Magazine",
      "first_name": "Litoral",
      "last_name": "Magazine",
      "nickname": "Litoral Magazine",
      "url": "https:\/\/litoralmagazine.com",
      "description": ""
    },
    "comments": [],
    "attachments": [
      {
        "id": 2336,
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
        "slug": "porto-canal-jornal-diario-litoral-awards-2014",
        "title": "porto-canal-jornal-diario-litoral-awards-2014",
        "description": "",
        "caption": "",
        "parent": 2335,
        "mime_type": "image\/jpeg",
        "images": {
          "full": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
            "width": 1000,
            "height": 600
          },
          "thumbnail": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-150x150.jpg",
            "width": 150,
            "height": 150
          },
          "medium": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-300x180.jpg",
            "width": 300,
            "height": 180
          },
          "medium_large": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
            "width": 1000,
            "height": 600
          },
          "post-thumbnail": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
            "width": 1000,
            "height": 600
          },
          "post-thumb": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
            "width": 1000,
            "height": 600
          },
          "medium-thumb": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-400x240.jpg",
            "width": 400,
            "height": 240
          },
          "small-thumb": {
            "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-95x60.jpg",
            "width": 95,
            "height": 60
          }
        }
      }
    ],
    "comment_count": 0,
    "comment_status": "closed",
    "thumbnail": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
    "custom_fields": {
      "tps_options": [
        "a:1:{s:7:\"enabled\";s:6:\"global\";}",
        "a:1:{s:7:\"enabled\";s:6:\"global\";}",
        "a:1:{s:7:\"enabled\";s:6:\"global\";}"
      ],
      "mvp_photo_credit": [
        "Nulla pariatur excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia. Photo by Shutterstock."
      ],
      "mvp_post_template": [
        "def-wide"
      ],
      "mvp_featured_image": [
        "show"
      ],
      "post_views_count": [
        "998"
      ],
      "mvp_video_embed": [
        "<iframe width=\"560\" height=\"315\" src=\"https:\/\/www.youtube.com\/embed\/BG5RVursEGQ?list=PLtUECMbIwb1X1NJEIU9pAbWkzBWMmg6Hj\" frameborder=\"0\" allowfullscreen><\/iframe>"
      ],
      "dpsp_networks_shares": [
        "a:0:{}"
      ]
    },
    "thumbnail_size": "post-thumbnail",
    "thumbnail_images": {
      "full": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
        "width": 1000,
        "height": 600
      },
      "thumbnail": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-150x150.jpg",
        "width": 150,
        "height": 150
      },
      "medium": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-300x180.jpg",
        "width": 300,
        "height": 180
      },
      "medium_large": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
        "width": 1000,
        "height": 600
      },
      "post-thumbnail": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
        "width": 1000,
        "height": 600
      },
      "post-thumb": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014.jpg",
        "width": 1000,
        "height": 600
      },
      "medium-thumb": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-400x240.jpg",
        "width": 400,
        "height": 240
      },
      "small-thumb": {
        "url": "https:\/\/litoralmagazine.com\/wp-content\/uploads\/2016\/01\/porto-canal-jornal-diario-litoral-awards-2014-95x60.jpg",
        "width": 95,
        "height": 60
      }
    }
  },
  "previous_url": "https:\/\/litoralmagazine.com\/litoral-awards14-no-canal-central\/",
  "next_url": "https:\/\/litoralmagazine.com\/premio-musica-sons-em-transito\/"
}

例如,我想说:originalJson.status = newJson.status。 originalJson.author.id = newJson.author。 originalJson.thumbnail.url = newJson.images.thumbnail_image。希望你明白我的意思。 您还可以在我得到的json上,在originalJson.post.content上看到我有很多html,例如:

{
  "status": "ok",
  "slug": "litoral-com-teste-noticia-url",
  "title": "Titulo de teste",
  "type": "post",
  "content": "Lorem",
  "published": 1,
  "excerpt": "",
  "categories": [1, 2, 4],
  "tags": [1, 2, 4],
  "author": 1,
  "comments": {
    "user_comment": 1,
    "body_comment": "comentario de teste"
  },
  "images": {
    "thumbnail_image": "http://wwww.imagem.com.jpeg",
    "featured_image": "http://wwww.imagem.com.jpeg"
  }
}

将此信息传递到新的json时,削减所有div的最佳方法是什么?我只想要div内的内容。我该如何实现?

我尝试搜索此文件,但找不到。基本上,我该如何制作像JS脚本那样的脚本,该脚本将获得旧的Json文件作为输入,并输出一个新的json文件,但是仅包含我想要的属性,并且还重命名了我解释过的大多数属性。

感谢任何帮助人员!

3 个答案:

答案 0 :(得分:1)

对于第一个问题,假设您不想动态指定想要的字段,可以通过执行以下操作使它看起来更简洁:

newJson = { 
    status: oldJson.status,
    author: oldJson.author
}

第二个问题: 我能想到的最接近删除div的方法是:

  • 使用replace方法用空字符串替换<div></div
  • 遍历字符串,找到前一个下一个index of <div和前一个下一个>的索引,然后在字符串之间substring

但是,如果我处于您的位置,我将首先尝试寻找其他方法来获取html数据,而不是字符串操作。

答案 1 :(得分:1)

如果您有有效的html字符串,则可以使用此函数提取文本:

function extractText(selector, html) {
  const elem = document.createElement('div');
  elem.innerHTML = html;

  return elem.querySelector(selector).innerText;
}

您可以像这样使用它:

const caption = extractText('p.wp-caption-text', "<div id=\"tps_nav_u...");
// now caption will be "Ribau Esteves – Presidente da Câmara Municipal de Aveiro"

请记住,如果选择器与html中的任何元素都不匹配,则此函数将引发。您将不得不相应地进行处理。

答案 2 :(得分:0)

尝试:

var stringText = {       “ status”:“确定”,       “帖子”:{         “ id”:2335,         “类别”:[           {             “ id”:299,             “ slug”:“ litoral-awards”,             “ title”:“ Litoral Awards”,             “ description”:“”,             “父母”:0,             “ post_count”:91           },         ],       },       “ previous_url”:“ https://litoralmagazine.com/litoral-awards14-no-canal-central/”,       “ next_url”:“ https://litoralmagazine.com/premio-musica-sons-em-transito/”     };

var status = stringText.status
var post = stringText.post;
var previous_url = stringText.previous_url;
var next_url = stringText.next_url;
console.log('object :', stringText.post.categories[0]);