我正在学习Golang,所以我可以重写一些shell脚本。
我的网址如下:
https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value
我想提取以下部分:
https://example-1.example.com/a/c482dfad3573acff324c/list.txt
在shell脚本中我会做这样的事情:
echo "$myString" | grep -o 'http://.*.txt'
只有使用标准库,在Golang中做同样事情的最佳方法是什么?
答案 0 :(得分:8)
有几个选择:
// match regexp as in question
pat := regexp.MustCompile(`https?://.*\.txt`)
s := pat.FindString(myString)
// everything before the query
s := strings.Split(myString, "?")[0] string
// same as previous, but avoids []string allocation
s := myString
if i := strings.IndexByte(s, '?'); i >= 0 {
s = s[:i]
}
// parse and clear query string
u, err := url.Parse(myString)
u.RawQuery = ""
s := u.String()
最后一个选项是最好的,因为它将处理所有可能的极端情况。
答案 1 :(得分:2)
您可以使用strings.IndexRune
,strings.IndexByte
,strings.Split
,strings.SplitAfter
,strings.FieldsFunc
,url.Parse
,regexp
或您的功能。
第一个最简单的方法:
您可以使用i := strings.IndexRune(s, '?')
或i := strings.IndexByte(s, '?')
然后s[:i]
这样(带注释输出):
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
i := strings.IndexByte(s, '?')
if i != -1 {
fmt.Println(s[:i]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
}
或者你可以使用url.Parse(s)
(我会用它):
package main
import "fmt"
import "net/url"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
url, err := url.Parse(s)
if err == nil {
url.RawQuery = ""
fmt.Println(url.String()) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
}
或者您可以使用regexp.MustCompile(".*\\.txt")
:
package main
import "fmt"
import "regexp"
var rgx = regexp.MustCompile(`.*\.txt`)
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
fmt.Println(rgx.FindString(s)) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
或者您可以使用splits := strings.FieldsFunc(s, func(r rune) bool { return r == '?' })
然后使用splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.FieldsFunc(s, func(r rune) bool { return r == '?' })
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
您可以使用splits := strings.Split(s, "?")
然后使用splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.Split(s, "?")
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
您可以使用splits := strings.SplitAfter(s, ".txt")
然后使用splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.SplitAfter(s, ".txt")
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
或者您可以使用您的功能(最独立的方式):
package main
import "fmt"
func left(s string) string {
for i, r := range s {
if r == '?' {
return s[:i]
}
}
return ""
}
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
fmt.Println(left(s)) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
答案 2 :(得分:2)
如果您只处理URL,可以使用Go的net/url
库https://golang.org/pkg/net/url/来解析URL,截断查询和片段部分(查询将是parm1=value,parm2=value
等),以及提取剩余部分scheme://host/path
,如下例(https://play.golang.org/p/Ao0jU22NyA):
package main
import (
"fmt"
"net/url"
)
func main() {
u, _ := url.Parse("https://example-1.example.com/a/b/c/list.txt?parm1=value,parm2=https%3A%2F%2Fexample.com%2Fa%3Fparm1%3Dvalue%2Cparm2%3Dvalue#somefragment")
u.RawQuery, u.Fragment = "", ""
fmt.Printf("%s\n", u)
}
输出:
https://example-1.example.com/a/b/c/list.txt
答案 3 :(得分:0)
我用regexp包从string中提取字符串。
在此示例中,我想在和<\ PERSON>之间进行提取,通过re表达式进行此操作,然后通过re1表达式替换和<\ PERSON>。
for循环用于是否存在多个match和re1格式用于替换。
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`<PERSON>(.*?)</PERSON>`)
string_l := "java -mx500m -cp stanford-ner.jar edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier classifiers/english.all.3class.distsim.crf.ser.gz -textFile PatrickYe.txt -outputFormat inlineXML 2> /dev/null I complained to <ORGANIZATION>Microsoft</ORGANIZATION> about <PERSON>Bill Gates</PERSON>.They told me to see the mayor of <PERSON>New York</PERSON>.,"
x := re.FindAllString(string_l, -1)
fmt.Println(x)
for v,st:= range x{
re1 := regexp.MustCompile(`<(.?)PERSON>`)
y1 := re1.ReplaceAllLiteralString(st,"")
fmt.Println(v,st," : sdf : ",y1)
}
}