
时间:2013-08-19 11:17:00

标签: c# javascript html ajax pinterest



例如,使用此板; http://pinterest.com/dodo/web-designui-and-mobile/


<div class="variableHeightLayout padItems GridItems Module centeredWithinWrapper" style="..">
    <!-- First div with a displayed board image -->
    <div class="item" style="top: 0px; left: 0px; visibility: visible;">..</div>
    <!-- Last div with a displayed board image -->
    <div class="item" style="top: 3343px; left: 1000px; visibility: visible;">..</div>


<div class="variableHeightLayout padItems GridItems Module centeredWithinWrapper" style="..">
    <!-- First div with a displayed board image -->
    <div class="item" style="top: 12431px; left: 750px; visibility: visible;">..</div>
    <!-- Last div with a displayed board image -->
    <div class="item" style="top: 19944px; left: 750px; visibility: visible;">..</div>






using System;
using System.Net;
using HtmlAgilityPack;

private void Main() {
    string pinterestURL = "http://www.pinterest.com/...";
    string XPath = ".../img";

    HtmlDocument doc = new HtmlDocument();

    // Currently only downloads the first 25 images.

    foreach(HtmlNode link in doc.DocumentElement.SelectNodes(strXPath))
         image_links[] = link["src"];
         // Use image links

5 个答案:

答案 0 :(得分:2)



  1. 这是PHP,而不是C#(但你说你对任何服务器端语言感兴趣)。
  2. 此代码挂钩(非官方)Pinterest搜索端点。您需要更改$ data和$ search_res以反映适合您的任务的端点(例如BoardFeedResouce)。注意:至少对于搜索,Pinterest目前使用两个端点,一个用于初始页面加载,另一个用于无限滚动操作。每个都有自己预期的参数结构。
  3. Pinterest没有正式的公共API,只要他们改变任何东西,就会发生这种情况,并且没有任何警告。
  4. 您可能会发现pinterestapi.co.uk更容易实施,并且可以接受您正在做的事情。
  5. 我在类下面有一些演示/调试代码,一旦你得到你想要的数据就不应该存在,以及你可能想要改变的默认页面获取限制。
  6. 兴趣点:

    1. 下划线_参数采用JavaScript格式的时间戳,即。像Unix时间,但它添加了毫秒。它实际上并不用于分页。
    2. 分页使用bookmarks属性,因此您向“新”端点发出第一个请求,但不需要它,然后从结果中获取bookmarks并在请求中使用它要获取结果的下一个“页面”,请从这些结果中取bookmarks以获取下一页,然后依此类推,直到您的结果用完或达到预设限制(或者您点击服务器)最大脚本执行时间)。我很想知道bookmarks字段编码的具体内容。我想除了针脚ID或其他页面标记外,还有一些有趣的秘诀。
    3. 我正在跳过html,而不是处理JSON,因为它比使用DOM操作解决方案或一堆正则表达式更容易(对我而言)。
    4. <?php
      if(!class_exists('Skrivener_Pins')) {
        class Skrivener_Pins {
           * Constructor
          public function __construct() {
           * Pinterest search function. Uses Pinterest's "internal" page APIs, so likely to break if they change.
           * @author [@skrivener] Philip Tillsley
           * @param $search_str     The string used to search for matching pins.
           * @param $limit          Max number of pages to get, defaults to 2 to avoid excessively large queries. Use care when passing in a value.
           * @param $bookmarks_str  Used internally for recursive fetches.
           * @param $pages          Used internally to limit recursion.
           * @return array()        int['id'], obj['image'], str['pin_link'], str['orig_link'], bool['video_flag']
           * TODO:
          public function get_tagged_pins($search_str, $limit = 1, $bookmarks_str = null, $page = 1) {
            // limit depth of recursion, ie. number of pages of 25 returned, otherwise we can hang on huge queries
            if( $page > $limit ) return false;
            // are we getting a next page of pins or not
            $next_page = false;
            if( isset($bookmarks_str) ) $next_page = true;
            // build url components
            if( !$next_page ) {
              // 1st time
              $search_res = 'BaseSearchResource'; // end point
              $path = '&module_path=' . urlencode('SearchInfoBar(query=' . $search_str . ', scope=boards)');
              $data = preg_replace("'[\n\r\s\t]'","",'{
                  "query":"' . $search_str . '"
                    "query":"' . $search_str . '"
            } else {
              // this is a fetch for 'scrolling', what changes is the bookmarks reference, 
              // so pass the previous bookmarks value to this function and it is included
              // in query
              $search_res = 'SearchResource'; // different end point from 1st time search
              $path = '';
              $data = preg_replace("'[\n\r\s\t]'","",'{
                  "query":"' . $search_str . '",
                  "bookmarks":["' . $bookmarks_str . '"],
            $data = urlencode($data);
            $timestamp = time() * 1000; // unix time but in JS format (ie. has ms vs normal server time in secs), * 1000 to add ms (ie. 0ms)
            // build url
            $url = 'http://pinterest.com/resource/' . $search_res . '/get/?source_url=/search/pins/?q=' . $search_str
                . '&data=' . $data
                . $path
                . '&_=' . $timestamp;//'1378150472669';
            // setup curl
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_HTTPHEADER, array("X-Requested-With: XMLHttpRequest"));
            // get result
            $curl_result = curl_exec ($ch); // this echoes the output
            $curl_result = json_decode($curl_result);
            curl_close ($ch);
            // clear html to make var_dumps easier to see when debugging
            // $curl_result->module->html = '';
            // isolate the pin data, different end points have different data structures
            if(!$next_page) $pin_array = $curl_result->module->tree->children[1]->children[0]->children[0]->children;
            else $pin_array = $curl_result->module->tree->children;
            // map the pin data into desired format
            $pin_data_array = array();
            $bookmarks = null;
            if(is_array($pin_array)) {
              if(count($pin_array)) {
                foreach ($pin_array as $pin) {
                  //setup data
                  $image_id = $pin->options->pin_id;
                  $image_data = ( isset($pin->data->images->originals) ) ? $pin->data->images->originals : $pin->data->images->orig;
                  $pin_url = 'http://pinterest.com/pin/' . $image_id . '/';
                  $original_url = $pin->data->link;
                  $video = $pin->data->is_video;
                  array_push($pin_data_array, array(
                    "id"          => $image_id,
                    "image"       => $image_data,
                    "pin_link"    => $pin_url,
                    "orig_link"   => $original_url,
                    "video_flag"  => $video,
                $bookmarks = reset($curl_result->module->tree->resource->options->bookmarks);
              } else {
                $pin_data_array = false;
            // recurse until we're done
            if( !($pin_data_array === false) && !is_null($bookmarks) ) {
              // more pins to get
              $more_pins = $this->get_tagged_pins($search_str, $limit, $bookmarks, ++$page);
              if( !($more_pins === false) ) $pin_data_array = array_merge($pin_data_array, $more_pins);
              return $pin_data_array;
            // end of recursion
            return false;
        } // end class Skrivener_Pins
      } // end if
       * Debug/Demo Code
       * delete or comment this section for production
      // output headers to control how the content displays
      // header("Content-Type: application/json");
      header("Content-Type: text/plain");
      // header("Content-Type: text/html");
      // define search term
      // $tag = "vader";
      $tag = "haemolytic";
      // $tag = "qjkjgjerbjjkrekhjk";
      if(class_exists('Skrivener_Pins')) {
        // instantiate the class
        $pin_handler = new Skrivener_Pins();
        // get pins, pinterest returns 25 per batch, function pages through this recursively, pass in limit to 
        // override default limit on number of pages to retrieve, avoid high limits (eg. limit of 20 * 25 pins/page = 500 pins to pull 
        // and 20 separate calls to Pinterest)
        $pins1 = $pin_handler->get_tagged_pins($tag, 2);
        // display the pins for demo purposes
        echo '<h1>Images on Pinterest mentioning "' . $tag . '"</h1>' . "\n";
        if( $pins1 != false ) {
          echo '<p><em>' . count($pins1) . ' images found.</em></p>' . "\n";
          skrivener_dump_images($pins1, 5);
        } else {
          echo '<p><em>No images found.</em></p>' . "\n";
      // demo function, dumps images in array to html img tags, can pass limit to only display part of array
      function skrivener_dump_images($pin_array, $limit = false) {
        if(is_array($pin_array)) {
          if($limit) $pin_array = array_slice($pin_array, -($limit));
          foreach ($pin_array as $pin) {
            echo '<img src="' . $pin['image']->url . '" width="' . $pin['image']->width . '" height="' . $pin['image']->height . '" >' . "\n";


答案 1 :(得分:1)






"options": {
    "board_id": "158400180582875562",
    "access": [],
    "bookmarks": [
"context": {
    "app_version": "fb43cdb"
"module": {
    "name": "GridItems",
    "options": {
        "scrollable": true,
        "show_grid_footer": true,
        "centered": true,
        "reflow_all": true,
        "virtualize": true,
        "item_options": {
            "show_rich_title": false,
            "squish_giraffe_pins": false,
            "show_board": false,
            "show_via": false,
            "show_pinner": false,
            "show_pinned_from": true
        "layout": "variable_height"
"append": true,
"error_strategy": 1


    "options": {
        "board_id": "158400180582875562",
        "access": [],
        "bookmarks": [
    "context": {
        "app_version": "fb43cdb"
    "module": {
        "name": "GridItems",
        "options": {
            "scrollable": true,
            "show_grid_footer": true,
            "centered": true,
            "reflow_all": true,
            "virtualize": true,
            "item_options": {
                "show_rich_title": false,
                "squish_giraffe_pins": false,
                "show_board": false,
                "show_via": false,
                "show_pinner": false,
                "show_pinned_from": true
            "layout": "variable_height"
    "append": true,
    "error_strategy": 2

正如您所看到的,没有太大变化。 Board_id是一样的。 error_strategy现在是2,而最后的&amp; _是不同的。

&amp; _参数在这里是关键。我敢打赌,它告诉页面从哪里开始下一组照片。我无法在任何响应或原始页面HTML中找到它的引用,但它必须在某处,或者在客户端通过javascript生成。无论哪种方式,页面/浏览器都必须知道下一步要求什么,所以这些信息是你应该能够获得的。

答案 2 :(得分:1)



# get all pins for the board
board_pins = []
pin_batch = pinterest.board_feed(board_id=target_board['id'], board_url=target_board['url'])

while len(pin_batch) > 0:
    board_pins += pin_batch
    pin_batch = pinterest.board_feed(board_id=target_board['id'], board_url=target_board['url'])


for pin in board_pins:
    url = pin['image']
    # process image url..

完整代码示例: https://github.com/bstoilov/py3-pinterest/blob/master/download_board_images.py


答案 3 :(得分:0)



curl -H "X-Requested-With:XMLHttpRequest" "http://pinterest.com/resource/CategoryFeedResource/get/?source_url=%2Fall%2Fgeek%2F&data=%7B%22options%22%3A%7B%22feed%22%3A%22geek%22%2C%22scope%22%3Anull%2C%22bookmarks%22%3A%5B%22Pz8xMzc3NjU4MjEyLjc0Xy0xfDE1ZjczYzc4YzNlNDg3M2YyNDQ4NGU1ZTczMmM0ZTQyYzBjMWFiMWNhYjRhMDRhYjg2MTYwMGVkNWQ0ZDg1MTY%3D%22%5D%2C%22is_category_feed%22%3Atrue%7D%2C%22context%22%3A%7B%22app_version%22%3A%22addc92b%22%7D%2C%22module%22%3A%7B%22name%22%3A%22GridItems%22%2C%22options%22%3A%7B%22scrollable%22%3Atrue%2C%22show_grid_footer%22%3Atrue%2C%22centered%22%3Atrue%2C%22reflow_all%22%3Atrue%2C%22virtualize%22%3Atrue%2C%22item_options%22%3A%7B%22show_pinner%22%3Atrue%2C%22show_pinned_from%22%3Afalse%2C%22show_board%22%3Atrue%2C%22show_via%22%3Afalse%7D%2C%22layout%22%3A%22variable_height%22%7D%7D%2C%22append%22%3Atrue%2C%22error_strategy%22%3A2%7D&module_path=App()%3EHeader()%3EDropdownButton()%3EDropdown()%3ECategoriesMenu(resource%3D%5Bobject+Object%5D%2C+name%3DCategoriesMenu%2C+resource%3DCategoriesResource(browsable%3Dtrue))&_=1377658213300" | python -mjson.tool



答案 4 :(得分:0)

#!/usr/bin/env bash 
## File: getpins.bsh 
## Copyrighted by +A.M.Danischewski  2016+ (c)
## This program may be reutilized without limits, provided this 
## notice remain intact. 

## If this breaks one day, then just fire up firefox Developer Tools and check the network traffic to 
## capture "copy as curl" of the calls to the search page (filter with BaseSearchResource), then the 
## call to feed more data (filter with SearchResource). 
## Do a search on whatever you want remove the cookie header, and add -o ret2.html -D h2.txt -c c1.txt, 
## then search replace the search terms as SEARCHTOKEN1 and SEARCHTOKEN2. 
## Description this script facilitates alternate browsers, by caching images/pins 
## from pinterest. This script is hardwired for two search terms. First create a directory 
## to where you want the images to go, then cd there. 
##  Usage: 
##    $> cd /big/drive/auto_gyros 
##    $> getpins.bsh "sleek autogyros"
## Expect around 900 images to land wherever you select, so make sure you have space! =) 

declare -r ORIG_IMGS="pin_orig_imgs.txt"
declare -r TMP_IMGS="pin_imgs.txt"
declare -r UA_HEADER="User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:19.$(($RANDOM%10))) Gecko/20100101 Firefox/19.0"

 ## Say Hello to the main page and get a cookie. 
declare PINCMD1=$(cat << EOF
curl -o ret1.html -D h1.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Connection: keep-alive' 'https://www.pinterest.com/'
 ## Start a search for our dear search terms. 
declare PINCMD2=$(cat << EOF
curl -H 'X-APP-VERSION: ea7a93a' -o ret2.html -D h2.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1'  -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%2C%22context%22%3A%7B%7D%2C%22module%22%3A%7B%22name%22%3A%22SearchPage%22%2C%22options%22%3A%7B%22restrict%22%3Anull%2C%22scope%22%3A%22pins%22%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%7D%7D%2C%22render_type%22%3A1%2C%22error_strategy%22%3A0%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874891' 
 ## Load further images. 
declare PINCMD3=$(cat << EOF
curl -H 'X-APP-VERSION: ea7a93a' -D h3.txt -c c1.txt -H 'Host: www.pinterest.com' -H '${UA_HEADER}' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'X-Pinterest-AppState: active' -H 'X-NEW-APP: 1'  -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: https://www.pinterest.com' -H 'Connection: keep-alive' 'https://www.pinterest.com/resource/SearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Fq%3DSEARCHTOKEN1%2520SEARCHTOKEN2%26rs%3Dtyped%260%3DSEARCHTOKEN1%257Ctyped%261%3DSEARCHTOKEN2%257Ctyped&data=%7B%22options%22%3A%7B%22layout%22%3Anull%2C%22places%22%3Afalse%2C%22constraint_string%22%3Anull%2C%22show_scope_selector%22%3Atrue%2C%22query%22%3A%22SEARCHTOKEN1+SEARCHTOKEN2%22%2C%22scope%22%3A%22pins%22%2C%22bookmarks%22%3A%5B%22_NEW_BOOK_MARK_%22%5D%7D%2C%22context%22%3A%7B%7D%7D&module_path=App%3EHeader%3ESearchForm%3ETypeaheadField(support_guided_search%3Dtrue%2C+resource_name%3DAdvancedTypeaheadResource%2C+tags%3Dautocomplete%2C+class_name%3DbuttonOnRight%2C+prefetch_on_focus%3Dtrue%2C+support_advanced_typeahead%3Dnull%2C+hide_tokens_on_focus%3Dundefined%2C+search_on_focus%3Dtrue%2C+placeholder%3DSearch%2C+show_remove_all%3Dtrue%2C+enable_recent_queries%3Dtrue%2C+name%3Dq%2C+view_type%3Dguided%2C+value%3D%22%22%2C+input_log_element_type%3D227%2C+populate_on_result_highlight%3Dtrue%2C+search_delay%3D0%2C+is_multiobject_search%3Dtrue%2C+type%3Dtokenized%2C+enable_overlay%3Dtrue)&_=1454779874911'
 ## Exactly 2 search terms in a single string are expected, you can hack it up if 
 ## you want something else.  
declare SEARCHTOKEN1=$(echo "${1}" | cut -d " " -f1)
declare SEARCHTOKEN2=$(echo "${1}" | cut -d " " -f2)


function lspinimgs() { grep -o "\"url\": \"http[s]*://[^\"]*.pinimg.com[^\"]*.jpg\"" "${1}" | cut -d " " -f2 | tr -d "\""; }
function mkpinorig() { sed "s#\(^http.*\)\(com/\)\([^/]*\)\(/.*jpg\$\)#\1\2originals\4#g" "${1}" > "${2}"; }    
function getpinbm() { grep -o "bookmarks\": [^ ]* "  "${1}" | sed "s/^book.*\[\"//g;s/\"\].*\$//g" | sort | uniq | grep -v "-end-"; }
function changepinbm() { PINCMD3=$(sed "s/\(^.*\)\(bookmarks%22%3A%5B%22\)\(.*\)\(%22%5D.*\$\)/\1\2${1}\4/g" <<< "${PINCMD3}"); }
function cleanup() { rm ret*html c1.txt "${TMP_IMGS}" h{1..3}.txt "${ORIG_IMGS}"; } 

function main() { 
eval "${PINCMD1}" 
eval "${PINCMD2}"
for ((i=3,lasti=2; i<10000; i++,lasti++)); do 
 pinbm=$(getpinbm "ret${lasti}.html")
 [[ -z "${pinbm}" ]] && break 
 changepinbm "${pinbm}"
 eval "${PINCMD3}" > "ret${i}.html"
for a in *.html; do lspinimgs "${a}" >> "${TMP_IMGS}"; done
mkpinorig "${TMP_IMGS}" "${ORIG_IMGS}"
IFS=$(echo -en "\n\b") && for a in $(sort "${ORIG_IMGS}" | uniq); do 
 wget --tries=3 -E -e robots=off -nc --random-wait --content-disposition --no-check-certificate -p --restrict-file-names=windows,lowercase,ascii --header "${UA_HEADER}" -nd "$a"  

exit 0