我是Go语言的新手...并且我正在寻找新方法来优化和/或修复此算法,以计算给定字符串中句子中的最大单词数。句子以“?”结尾要么 '!'要么 '。'并且函数应该返回int> = 0。
// MaxWordsInSentences - return max words in one sentences
func MaxWordsInSentences(S string) (result int) {
r, _ := regexp.Compile("[.||?||!]")
count := strings.Count(S, ".") + strings.Count(S, "!") + strings.Count(S, "?") // Total sentaces
for i := 0; i < count; i++ {
sentence := r.Split(S, count)[i]
splitSentence := strings.Split(sentence, " ")
var R []string
for _, str := range splitSentence {
if str != "" {
R = append(R, str)
}
}
if len(R) > result {
result = len(R)
}
}
return
}
示例
句子=> "One two three four five six seven eight. One two? One two three four five six seven eight nine? One two three! One two three four."
应返回9作为结果
答案 0 :(得分:2)
在您提供的简单测试用例上,您的算法似乎可以正常工作。您的算法在真实文本上效果不佳。
考虑我的简单算法:
func maxSentenceWords(s string) int {
maxWords, nWords := 0, 0
inWord := false
for _, r := range s {
switch r {
case '.', '?', '!':
inWord = false
if maxWords < nWords {
maxWords = nWords
}
nWords = 0
default:
if unicode.IsSpace(r) {
inWord = false
} else if inWord == false {
inWord = true
nWords++
}
}
if maxWords < nWords {
maxWords = nWords
}
}
return maxWords
}
游乐场:https://play.golang.org/p/OD8jNW1hyAa
它通过了您的简单测试。短期基准(Lorem Ipsum)运行很快,而长期基准(莎士比亚)运行很快
$ go test words_test.go -run=PeterSO -v -bench=PeterSO -benchmem -timeout=5m
=== RUN TestPeterSO
--- PASS: TestPeterSO (0.00s)
BenchmarkPeterSOL-4 300000 4027 ns/op 0 B/op 0 allocs/op
BenchmarkPeterSOS-4 20 54084832 ns/op 0 B/op 0 allocs/op
$
考虑您的复杂算法:
func MaxWordsInSentences(S string) (result int) {
r, _ := regexp.Compile("[.||?||!]")
count := strings.Count(S, ".") + strings.Count(S, "!") + strings.Count(S, "?") // Total sentaces
for i := 0; i < count; i++ {
sentence := r.Split(S, count)[i]
splitSentence := strings.Split(sentence, " ")
var R []string
for _, str := range splitSentence {
if str != "" {
R = append(R, str)
}
}
if len(R) > result {
result = len(R)
}
}
return
}
游乐场:https://play.golang.org/p/MCj-XxEid73
它通过了您的简单测试。短期基准(Lorem Ipsum)运行缓慢,而较长基准(Shakespeare)运行很长时间(5分钟后被杀死)。
$ go test words_test.go -run=Ljubon -v -bench=Ljubon -benchmem -timeout=5m
=== RUN TestLjubon
--- PASS: TestLjubon (0.00s)
BenchmarkLjubonL-4 20000 78623 ns/op 6984 B/op 62 allocs/op
*** Test killed with quit: ran too long (6m0s).
$
test words_test.go
:
package main
import (
"fmt"
"io/ioutil"
"regexp"
"strings"
"testing"
"unicode"
)
var sentences = "One two three four five six seven eight. One two? One two three four five six seven eight nine? One two three! One two three four."
var loremipsum = `
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident,
sunt in culpa qui officia deserunt mollit anim id est laborum.
`
var shakespeare = func() string {
// The Complete Works of William Shakespeare by William Shakespeare
// http://www.gutenberg.org/files/100/100-0.txt
data, err := ioutil.ReadFile(`/home/peter/shakespeare.100-0.txt`)
if err != nil {
panic(err)
}
return string(data)
}()
func maxSentenceWords(s string) int {
maxWords, nWords := 0, 0
inWord := false
for _, r := range s {
switch r {
case '.', '?', '!':
inWord = false
if maxWords < nWords {
maxWords = nWords
}
nWords = 0
default:
if unicode.IsSpace(r) {
inWord = false
} else if inWord == false {
inWord = true
nWords++
}
}
if maxWords < nWords {
maxWords = nWords
}
}
return maxWords
}
func TestPeterSO(t *testing.T) {
want := 9
got := maxSentenceWords(sentences)
if got != want {
t.Errorf("want %d; got %d", want, got)
}
}
func BenchmarkPeterSOL(b *testing.B) {
for N := 0; N < b.N; N++ {
maxSentenceWords(loremipsum)
}
}
func BenchmarkPeterSOS(b *testing.B) {
for N := 0; N < b.N; N++ {
maxSentenceWords(shakespeare)
}
}
// MaxWordsInSentences - return max words in one sentences
func MaxWordsInSentences(S string) (result int) {
r, _ := regexp.Compile("[.||?||!]")
count := strings.Count(S, ".") + strings.Count(S, "!") + strings.Count(S, "?") // Total sentaces
for i := 0; i < count; i++ {
sentence := r.Split(S, count)[i]
splitSentence := strings.Split(sentence, " ")
var R []string
for _, str := range splitSentence {
if str != "" {
R = append(R, str)
}
}
if len(R) > result {
result = len(R)
}
}
return
}
func TestLjubon(t *testing.T) {
want := 9
got := MaxWordsInSentences(sentences)
if got != want {
t.Errorf("want %d; got %d", want, got)
}
}
func BenchmarkLjubonL(b *testing.B) {
for N := 0; N < b.N; N++ {
MaxWordsInSentences(loremipsum)
}
}
func BenchmarkLjubonS(b *testing.B) {
for N := 0; N < b.N; N++ {
MaxWordsInSentences(shakespeare)
}
}
func main() {
s := "One two three four five six seven eight. One two? One two three four five six seven eight nine? One two three! One two three four."
max := maxSentenceWords(s) // 9
fmt.Println(max)
s = "One two three! One two three four"
max = maxSentenceWords(s) // 4
fmt.Println(max)
s = loremipsum
max = maxSentenceWords(s)
fmt.Println(max)
}
我称其为法律文书,可以表述为 遵循:给一个小男孩锤子,他会发现一切 他遇到需要重击。
亚伯拉罕·卡普兰(Abraham Kaplan),调查行为:行为学方法 《科学》,1964年,第28页。
围棋regexp
包裹着锤子砸了所有文本吗?
答案 1 :(得分:-1)
func MaxWordsInSentences(s string) (result int) {
var offset, wordCount int
for i, r := range s {
switch r {
default:
if unicode.IsSpace(r) {
wordCount++
}
case '.', '?', '!':
if wordCount > result {
result = wordCount
}
wordCount = 1
}
}
return
}