如何将Javascript数据存储在CSV文件中

时间:2016-12-30 09:59:43

标签: javascript php csv web-scraping

我使用PhantomJs Library

获取数据
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent';
page.open('https://www.apwagner.com/appliance-part/wpl/wp661600', function(status) {
  if (status !== 'success') {
    console.log('Unable to access network');
  } else {
    var ua = page.evaluate(function() {
      return document.getElementById('ModelList').textContent;  
      //return document.getElementById('ModelList').innerHTML; 
    });
    console.log(ua);
  }
  phantom.exit();
});

输出

             1DNET3205TQ0
             7MMMS0100VW0
             7MMMS0100VW1
             7MMMS0120VM0
             7MMMS0140VW0
             7MMMS0160VW0

如果我尝试使用innerHTML获取输出,则输出如

    <ul class="modelnos">
                        <li><a class="cursor" href="/appliance/1dnet3205tq0" onclick="return ProductService.SaveLogModelView('1DNET3205TQ0', 'MAC')"> 1DNET3205TQ0</a></li>
                        <li><a class="cursor" href="/appliance/7mmms0100vw0" onclick="return ProductService.SaveLogModelView('7MMMS0100VW0', 'MAC')"> 7MMMS0100VW0</a></li>
                        <li><a class="cursor" href="/appliance/7mmms0100vw1" onclick="return ProductService.SaveLogModelView('7MMMS0100VW1', 'MAC')"> 7MMMS0100VW1</a></li>
                        <li><a class="cursor" href="/appliance/7mmms0120vm0" onclick="return ProductService.SaveLogModelView('7MMMS0120VM0', 'MAC')"> 7MMMS0120VM0</a></li>
                        <li><a class="cursor" href="/appliance/7mmms0140vw0" onclick="return ProductService.SaveLogModelView('7MMMS0140VW0', 'MAC')"> 7MMMS0140VW0</a></li>
</ul>

但是作为变量的输出,我希望以数组格式输出。

喜欢var models = ["1DNET3205TQ0", "7MMMS0100VW0", "7MMMS0100VW1"];

并将此数组放在csv文件中。

如何在数组中获取此数据并放入csv。

更新:

实际上我想从该数组中的每个值创建表格html。

像表格中的3列一样。

<table>
<tr><td> 1DNET3205TQ0 </td>
<td> 7MMMS0100VW0 </td>
<td> 7MMMS0100VW1 </td>
</tr>
<tr><td> 7MMMS0120VM0 </td>
<td> 7MMMS0140VW0 </td>
<td> 7MMMS0160VW0 </td>
</tr>
</table>

2 个答案:

答案 0 :(得分:0)

使用split()函数和join()函数的组合可以获得正确的结果..(有关详细说明,请参阅代码中的comments

评论显示在grey中,并且/**/周围有

&#13;
&#13;
/* create a new array for  ua values */

var my_array = [];

var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent';
page.open('https://www.apwagner.com/appliance-part/wpl/wp661600', function(status) {
  if (status !== 'success') {
    console.log('Unable to access network');
  } else {
    var ua = page.evaluate(function() {
      return document.getElementById('ModelList').textContent;
      //return document.getElementById('ModelList').innerHTML; 
    });
    console.log(ua);

    /*  split    ua   to an array */
    my_array = ua.split(/\s+/);
  }
  phantom.exit();
});

/* now at the end to make into a 'csv' use join() */
/*  the ',' parameter specifies what character to put      in between the values */

var csv_array = my_array.join(",");
console.log(csv_array);
&#13;
&#13;
&#13;

重新记录的步骤:

  1. 创建一个变量以使用var my_array = [];

  2. 存储值
  3. 在您的函数中,使用my_array.split(/\s+/);会使用regular expression匹配whitespace

  4. 将结果拆分为数组
  5. 完成循环后,使用my_array.join(",");将所有值组合成一个字符串,其中包含&#34;,&#34;在每个值之间

  6. 编辑

    要将结果放入表中,而不是使用my_array.join(),请使用for循环,每次循环通过时,您必须使用{grab数组值my_array[i] 1}}其中i表示数组的索引,然后使用document.createElement()创建各种tabletrtd元素,最后使用appendChild()将元素插入到正确的父元素中以创建表

    在示例中,我添加了一个目标div,表格也将添加到

    &#13;
    &#13;
    /* original array from other snippet */
    var my_array = ["value 1", "value 2", "value 3"];
    
    /* new table element */
    var new_table = document.createElement("table");
    
    /*  for loop which sets i to a value for each value listed in my_array */
    for (var i = 0; i < my_array.length; i++) {
    
      /* create a row */
      var new_row = document.createElement("tr");
    
      /* create a cell for that row */
      var new_cell = document.createElement("td");
    
      /* set the text in the created cell */
      new_cell.innerText = my_array[i];
    
      /* add the cell to the row */
      new_row.appendChild(new_cell);
    
      /* add the row to the table */
      new_table.appendChild(new_row);
    
    }
    /* destination div */
    var destination = document.getElementById("table_destination");
    
    /* add the table to the destination div */
    destination.appendChild(new_table);
    &#13;
    #table_destination > table,
    td {
      width: 50%;
      border: 1px solid black;
    }
    &#13;
    <div id="table_destination"></div>
    &#13;
    &#13;
    &#13;

答案 1 :(得分:0)

var array = [];
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent';
page.open('https://www.apwagner.com/appliance-part/wpl/wp661600', function(status) {
    if (status !== 'success') {
        console.log('Unable to access network');
    } else {
        var ua = page.evaluate(function() {
            return document.getElementById('ModelList').textContent;
        });
        console.log(ua);
        array = ua.split(/\s+/);
    }
    phantom.exit();
});

var fs = require('fs');
//for users running with node
/*fs.writeFile("/home/data.csv", array.join(','), function(err) {
    if(err) {
        return console.log(err);
    }

    console.log("The file was saved!");
});*/
//for users running with phantomjs
fs.write('/home/data.csv', array.join(','), 'w');