用于查找具有属性但具有唯一顺序的标签的正则表达式

时间:2019-06-20 03:50:44

标签: php regex

我试图从html和idk中查找日期和交易,如何使用正则表达式获取html。我尝试了简单的html,发现很难实现。我想做的是找到交易的日期,并将其放入数组,然后以一种不错的格式打印出来。我试图从此html表中获取某些值。 Date的html是(div style =“ width:100%; overflow:hidden;”),所有的横标都是(td align =“ right”)。这是html(https://pastebin.com/L8emba2X

我一直在搞弄很多不同版本的正则表达式,但由于每个日期有一个日期要拉出4个其他事务,所以它们似乎都不起作用。

const axios = require('axios');

/**
 * Triggered from a message on a Cloud Pub/Sub topic.
 *
 * @param {!Object} event Event payload.
 * @param {!Object} context Metadata for the event.
 */
exports.processPubSubMessage = (event, context) => {
  const pubsubMessage = event.data;
  const dataString = Buffer.from(pubsubMessage, 'base64').toString();
  const message = JSON.parse(dataString);
  const commitSha = message.sourceProvenance.resolvedRepoSource.commitSha;
  const repoName = message.sourceProvenance.resolvedRepoSource.repoName;
  const [bitbucket, username, repo_slug] = repoName.split('_');
  // Build Bitbucket payload data.
  const payload = {
      type: 'string',
      created_on: message.createTime,
      description: `Status: ${message.status}`,
      key: 'string',
      name: 'Google Cloud Build',
      refname: `buildTriggerId: ${message.buildTriggerId}`,
      state: getBitbucketState(message.status),
      updated_on: message.finishTime,
      url: message.logUrl,
      uuid: message.id,
  }
  // Send request to Bitbucket.
  const token = process.env.BITBUCKET_TOKEN;
  const url = getBuildUrl(username, repo_slug, commitSha);
  axios.post(url, payload, {
      headers: { Authorization: `Basic ${token}` }
  })
      .then(function(response){
          console.log(response);
      })
      .catch(function(error){
          console.log(error);
      });


  /**
   * See: https://developer.atlassian.com/bitbucket/api/2/reference/resource/repositories/%7Busername%7D/%7Brepo_slug%7D/commit/%7Bnode%7D/statuses/build
   * 
   * @param {string} username
   * @param {string} repo_slug
   * @param {string} commitSha 
   */
  function getBuildUrl(username, repo_slug, commitSha) {
      const baseUrl = 'https://api.bitbucket.org/2.0/repositories';
      return `${baseUrl}/${username}/${repo_slug}/commit/${commitSha}/statuses/build`;;
  }

  /**
   * Translates states from Google Cloud Build Message to Bitbucket.
   * See: https://developer.atlassian.com/bitbucket/api/2/reference/resource/repositories/%7Busername%7D/%7Brepo_slug%7D/commit/%7Bnode%7D/statuses/build
   * 
   * @param {string} status 
   */
  function getBitbucketState(status) {
      switch(status.toLowerCase()) {
          case 'success':
              return 'SUCCESSFUL';
          case 'queued':
          case 'working':
              return 'INPROGRESS';
          default:
              return 'FAILED';
      }
  }
};

我用了这个,只有交易出来了。另外,idk如何通过特定交易打印日期。

1 个答案:

答案 0 :(得分:1)

我的猜测是,您可能想要一个类似于以下内容的表达式:

<\s*div style="width:100%;overflow:hidden;"\s*>([\s\S]*?)<\/div>|<td align="right">(.+?)<\/td>

用于使用([\s\S]*?)捕获任何字符和换行符。

您的尝试很好,但是(.*?)不会通过换行符,而这些换行符是:

([\s\S]*?)
([\d\D]*?)
([\w\W]*?)

Demo

测试

$re = '/<\s*div style="width:100%;overflow:hidden;"\s*>([\s\S]*?)<\/div>|<td align="right">(.+?)<\/td>/m';
$str = '<td align="left"><div style="width:100%;overflow:hidden;">
    2019.06.04 09:35
</div></td><td>5176</td><td align="right">0.00</td><td align="right">0.00</td><td align="right">5,000.00</td><td align="right">5,000.00</td><td>орлого</td><td>                 </td>
</tr><tr>
<td align="left"><div style="width:100%;overflow:hidden;">
    2019.06.04 09:35
</div></td><td>5024</td><td align="right">5,000.00</td><td align="right">-50.00</td><td align="right">0.00</td><td align="right">4,950.00</td><td>Ухаалаг мэдээ үйлчилгээний хураамж</td><td>                 </td>
</tr><tr>
<td align="left"><div style="width:100%;overflow:hidden;">
    2019.06.14 11:00
        </tbody>
    </table>
</div>';

preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);

// Print the entire match result
var_dump($matches);

输出

array(11) {
  [0]=>
  array(2) {
    [0]=>
    string(69) "<div style="width:100%;overflow:hidden;">
    2019.06.04 09:35
</div>"
    [1]=>
    string(22) "
    2019.06.04 09:35
"
  }
  [1]=>
  array(3) {
    [0]=>
    string(27) "<td align="right">0.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(4) "0.00"
  }
  [2]=>
  array(3) {
    [0]=>
    string(27) "<td align="right">0.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(4) "0.00"
  }
  [3]=>
  array(3) {
    [0]=>
    string(31) "<td align="right">5,000.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(8) "5,000.00"
  }
  [4]=>
  array(3) {
    [0]=>
    string(31) "<td align="right">5,000.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(8) "5,000.00"
  }
  [5]=>
  array(2) {
    [0]=>
    string(69) "<div style="width:100%;overflow:hidden;">
    2019.06.04 09:35
</div>"
    [1]=>
    string(22) "
    2019.06.04 09:35
"
  }
  [6]=>
  array(3) {
    [0]=>
    string(31) "<td align="right">5,000.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(8) "5,000.00"
  }
  [7]=>
  array(3) {
    [0]=>
    string(29) "<td align="right">-50.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(6) "-50.00"
  }
  [8]=>
  array(3) {
    [0]=>
    string(27) "<td align="right">0.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(4) "0.00"
  }
  [9]=>
  array(3) {
    [0]=>
    string(31) "<td align="right">4,950.00</td>"
    [1]=>
    string(0) ""
    [2]=>
    string(8) "4,950.00"
  }
  [10]=>
  array(2) {
    [0]=>
    string(99) "<div style="width:100%;overflow:hidden;">
    2019.06.14 11:00
        </tbody>
    </table>
</div>"
    [1]=>
    string(52) "
    2019.06.14 11:00
        </tbody>
    </table>
"
  }
}