使用Nokogiri解析html时出现小于字符的错误

时间:2014-04-14 15:30:52

标签: html ruby nokogiri

我使用Nokogiri来解析ruby中的html link。它的内容中有一个“< 3”字符。所以,Nokogiri认为它是一个开放的标签,我无法得到它的全部内容。我怎么解决呢?谢谢!

2 个答案:

答案 0 :(得分:1)

页面内容不是html,而是json。

使用JSON.parse解析它。

obj = JSON.parse(content)

答案 1 :(得分:1)

以下是如何使用HTTParty,只需为每个端点创建一个具有访问方法的类,例如search

require 'httparty'

class FourSquare
  include HTTParty
  base_uri 'api.foursquare.com/v2'

  def initialize
    @token = "VJZUMY5KPVGFNSNQTKZEG5BXTNZAZA51NPQM20UBXVB5OHMO"
  end

  def search(what=:venues, radius=50, lat_long="21.01127405231557,105.86252250209158")
    options = {ll:lat_long, intent:"browse", radius:radius, oauth_token:@token, v:"20140414"}
    self.class.get("/#{what}/search", query: options)
  end
end

像这样使用它:

fs = FourSquare.new
result = fs.search #optional parameters here

现在result有响应,已经解析为Ruby哈希:

{"meta"=>{"code"=>200},
 "notifications"=>[{"type"=>"notificationTray", "item"=>{"unreadCount"=>0}}],
 "response"=>
  {"venues"=>
    [{"id"=>"512ae67ee4b0861e65cc6d1d",
      "name"=>"tại gia",
      "contact"=>{},
      "location"=>
       {"lat"=>21.01130249367748,
        "lng"=>105.86250172574675,
        "distance"=>3,
        "cc"=>"VN",
        "country"=>"Vietnam"},
      "categories"=>[],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>2, "usersCount"=>2, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"},
     {"id"=>"4f191ed9e4b08b384dcc274c",
      "name"=>"Chợ Đuổi Nguyễn Cao",
      "contact"=>{},
      "location"=>
       {"address"=>"Nguyễn Cao",
        "crossStreet"=>"Yersin",
        "lat"=>21.01157363534264,
        "lng"=>105.86230101953188,
        "distance"=>40,
        "cc"=>"VN",
        "city"=>"Hà Nội",
        "state"=>"Thành Phố Hà Nội",
        "country"=>"Việt Nam"},
      "categories"=>
       [{"id"=>"50be8ee891d4fa8dcc7199a7",
         "name"=>"Market",
         "pluralName"=>"Markets",
         "shortName"=>"Market",
         "icon"=>
          {"prefix"=>"https://ss1.4sqi.net/img/categories_v2/shops/default_",
           "suffix"=>".png"},
         "primary"=>true}],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>24, "usersCount"=>9, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"},
     {"id"=>"4f310be1e4b0f99d87e0ebff",
      "name"=>"Ngõ 158 Phường Bạch Đằng",
      "contact"=>{},
      "location"=>
       {"isFuzzed"=>true,
        "lat"=>21.012500145519407,
        "lng"=>105.85838729146847,
        "distance"=>450,
        "cc"=>"VN",
        "city"=>"Hà Nội",
        "state"=>"Thành Phố Hà Nội",
        "country"=>"Việt Nam"},
      "categories"=>
       [{"id"=>"4bf58dd8d48988d103941735",
         "name"=>"Home (private)",
         "pluralName"=>"Homes (private)",
         "shortName"=>"Home",
         "icon"=>
          {"prefix"=>"https://ss1.4sqi.net/img/categories_v2/building/home_",
           "suffix"=>".png"},
         "primary"=>true}],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>19, "usersCount"=>2, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"},
     {"id"=>"505b08bde4b016e61b081acc",
      "name"=>"trước gương - ở nhà <3",
      "contact"=>{},
      "location"=>
       {"lat"=>21.0113,
        "lng"=>105.8625,
        "distance"=>3,
        "cc"=>"VN",
        "country"=>"Vietnam"},
      "categories"=>[],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>2, "usersCount"=>1, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"},
     {"id"=>"4e1878a71f6eb955987a0ef4",
      "name"=>"CMIV's house",
      "contact"=>{},
      "location"=>
       {"isFuzzed"=>true,
        "lat"=>21.012500145519407,
        "lng"=>105.85838729146847,
        "distance"=>450,
        "cc"=>"VN",
        "city"=>"Hà Nội",
        "state"=>"Thành Phố Hà Nội",
        "country"=>"Việt Nam"},
      "categories"=>
       [{"id"=>"4bf58dd8d48988d103941735",
         "name"=>"Home (private)",
         "pluralName"=>"Homes (private)",
         "shortName"=>"Home",
         "icon"=>
          {"prefix"=>"https://ss1.4sqi.net/img/categories_v2/building/home_",
           "suffix"=>".png"},
         "primary"=>true}],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>1, "usersCount"=>1, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"},
     {"id"=>"4f58d391e4b0cd6f8d2f5c44",
      "name"=>"Cty TNHH PR&Pr Việt Nam",
      "contact"=>{},
      "location"=>
       {"isFuzzed"=>true,
        "lat"=>21.012500145519407,
        "lng"=>105.85838729146847,
        "distance"=>450,
        "cc"=>"VN",
        "city"=>"Cầu Giấy",
        "state"=>"Thành Phố Hà Nội",
        "country"=>"Việt Nam"},
      "categories"=>
       [{"id"=>"4bf58dd8d48988d103941735",
         "name"=>"Home (private)",
         "pluralName"=>"Homes (private)",
         "shortName"=>"Home",
         "icon"=>
          {"prefix"=>"https://ss1.4sqi.net/img/categories_v2/building/home_",
           "suffix"=>".png"},
         "primary"=>true}],
      "verified"=>false,
      "stats"=>{"checkinsCount"=>0, "usersCount"=>0, "tipCount"=>0},
      "specials"=>{"count"=>0, "items"=>[]},
      "hereNow"=>{"count"=>0, "summary"=>"0 people here", "groups"=>[]},
      "referralId"=>"v-1397697176"}]}}