在Swift中从href提取链接

时间:2019-05-09 03:20:45

标签: swift

假设我有这样的html链接:

<a href = "https://mitsui-shopping-park.com/lalaport/koshien/" target="_blank"> https://mitsui-shopping-park.com/lalaport / koshien / </a>

我要提取:

<a href = "THIS LINK" target="_blank"> NOT THIS LINK </a> 

我尝试过:someString.replacingOccurrences(of: "<[^>]+>", with: "", options: .regularExpression, range: nil),但这给了我:

<a href = "NOT THIS LINK" target="_blank"> BUT THIS LINK </a>

请帮助。

3 个答案:

答案 0 :(得分:2)

不需要正则表达式,可以使用属性字符串的link属性。

首先,让我们使用this扩展名:

extension String{
    func convert2Html() -> NSAttributedString {

        guard let data = data(using: .utf8) else { return NSAttributedString() }

        do {
            let htmlAttrib = NSAttributedString.DocumentType.html
            return try NSAttributedString(data: data,
                                          options: [.documentType : htmlAttrib],
                                          documentAttributes: nil)
        } catch {
            return NSAttributedString()
        }
    }
}

转换此String

let html = "<a href = \"https://mitsui-shopping-park.com/lalaport/koshien/\" target=\"_blank\"> https://mitsui-shopping-park.com/lalaport / koshien / </a>"

NSAttributedString

let attrib = html.convert2Html()

然后以这种方式提取链接:

let link = attrib.attribute(.link, at: 0, effectiveRange: nil)

if let url = link as? NSURL, let href = url.absoluteString {
    print(href)  //https://mitsui-shopping-park.com/lalaport/koshien/
}

答案 1 :(得分:1)

这是在href="和结束"之间获取值的一种可能的解决方案。这仅适用于字符串中的一个href。

let html = "<a href = \"https://mitsui-shopping-park.com/lalaport/koshien/\" target=\"_blank\"> https://mitsui-shopping-park.com/lalaport / koshien / </a>"

if let hrefRange = html.range(of: "(?:href\\s*=\\s*\")[^\"]*(?:\")", options: .regularExpression) {
    let href = html[hrefRange]
    print(href)
} else {
    print("There is no href")
}

让我们分解一下正则表达式:

首先,让我们除去RE中需要的额外\以使其成为值Swift字符串。这给我们留下了

(?:href\s*=\s*")[^"]*(?:")

这主要包括三个部分:

(?:href\s*=\s*") - the href, optional space, =, optional space, and opening quote
[^"]* - the actual URL - everything that isn't a quote
(?:") - the close quote

(?: )语法意味着里面的东西将不属于返回字符串的一部分。

答案 2 :(得分:0)

对正则表达式的捕获组功能使用NSRegularExpression.matches。我总是使用这种方便的扩展方法:

extension String {
    func capturedGroups(withRegex pattern: String) -> [String?] {
        var results = [String?]()

        var regex: NSRegularExpression
        do {
            regex = try NSRegularExpression(pattern: pattern, options: [])
        } catch {
            return results
        }

        let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))

        guard let match = matches.first else { return results }
        let lastRangeIndex = match.numberOfRanges - 1
        guard lastRangeIndex >= 1 else { return results }

        for i in 0...lastRangeIndex {
            let capturedGroupIndex = match.range(at: i)
            if(capturedGroupIndex.length>0)
            {
                let matchedString = (self as NSString).substring(with: capturedGroupIndex)
                results.append(matchedString)
            }
            else
            {
                results.append(nil)
            }
        }

        return results
    }
}

var html = """
<a href = "https://mitsui-shopping-park.com/lalaport/koshien/" target="_blank"> https://mitsui-shopping-park.com/lalaport / koshien / </a>
"""
print(html.capturedGroups(withRegex: "href\\s*=\\s*\"([^\"]+)\"")[1])