使用golang从文件中提取数据

时间:2016-03-27 09:23:04

标签: go

如果满足条件,我试图从文件中提取行。

文件中的数据如下所示:

Sat 08 Aug 2015
Norwich City
A
League
    W 3-1
    Zaha 38; Delaney 48; Cabaye 90
    27,036

如果日期模式匹配,我想打印以下五行。

我的代码是,

func main() {

    r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")

    file, err := os.Open("test.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {

        if r.MatchString(scanner.Text()) {

            fmt.Println(scanner.Text())

            // here how do i capture the following 5 lines

        }

        if err := scanner.Err(); err != nil {
            log.Fatal(err)
        }
    }
}

3 个答案:

答案 0 :(得分:2)

不确定我是否遗漏了某些东西,但这样就足够了:

package main

import (
"regexp"
"os"
"log"
"bufio"
"fmt"
)

func main() {

r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")

file, err := os.Open("/tmp/test.txt")
if err != nil {
    log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {

    if r.MatchString(scanner.Text()) {
        fmt.Println(scanner.Text())
        for i :=0; i < 5; i++{
           scanner.Scan()
            fmt.Println(scanner.Text())
        }

    }

    if err := scanner.Err(); err != nil {
        log.Fatal(err)
    }
  }
}

答案 1 :(得分:1)

也许,这样的事情?

package main

import (
    "bufio"
    "fmt"
    "io"
    "os"
    "strings"
    "time"
)

type Match struct {
    Date       time.Time
    Opponents  string
    Venue      string
    Type       string
    Result     string
    Scorers    string
    Attendance string
}

var fmtMatchDate = "Mon 02 Jan 2006"

func (m Match) String() string {
    var s string
    s += fmt.Sprint(m.Date.Format(fmtMatchDate), "\n")
    s += fmt.Sprint(
        m.Opponents, "\n",
        m.Venue, "\n",
        m.Type, "\n",
        m.Result, "\n",
    )
    if len(m.Scorers) > 0 {
        s += fmt.Sprint(
            m.Scorers, "\n",
        )
    }
    if len(m.Attendance) > 0 {
        s += fmt.Sprint(
            m.Attendance, "\n",
        )
    }
    return s
}

func ParseMatch(lines []string) (Match, error) {
    // TODO: Implement a better parser.
    var m Match
    for i, line := range lines {
        line = strings.TrimSpace(line)
        switch i {
        case 0:
            date, err := time.Parse(fmtMatchDate, line)
            if err != nil {
                return Match{}, err
            }
            m.Date = date
        case 1:
            m.Opponents = line
        case 2:
            m.Venue = line
        case 3:
            m.Type = line
        case 4:
            m.Result = line
        case 5:
            m.Scorers = line
        case 6:
            m.Attendance = line
        default:
        }
    }
    return m, nil
}

func main() {
    f, err := os.Open("match.txt")
    if err != nil {
        fmt.Fprintln(os.Stderr, err)
        os.Exit(1)
    }
    var lines []string
    snr := bufio.NewScanner(f)
    for snr.Scan() {
        line := snr.Text()
        if _, err = time.Parse(fmtMatchDate, strings.TrimSpace(line)); err == nil {
            if len(lines) > 0 {
                m, err := ParseMatch(lines)
                if err != nil {
                    fmt.Fprintln(os.Stderr, err)
                } else {
                    fmt.Print(m)
                }
            }
            lines = lines[:0]
        }
        lines = append(lines, line)
    }
    if len(lines) > 0 {
        m, err := ParseMatch(lines)
        if err != nil {
            fmt.Fprintln(os.Stderr, err)
        } else {
            fmt.Print(m)
        }
    }
    if err := snr.Err(); err != nil {
        if err != io.EOF {
            fmt.Fprintln(os.Stderr, err)
            os.Exit(1)
        }
    }
}

输入:

$ cat match.txt
Sat 08 Aug 2015
Norwich City
A
League
    W 3-1
    Zaha 38; Delaney 48; Cabaye 90
    27,036
Sun 16 Aug 2015
Arsenal
H
League
    L 1-2
Sat 29 Aug 2015
Chelsea
A
League
    W 2-1
    Sako 64; Ward 80
    41,581

输出:

$ go run match.go
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
$

答案 2 :(得分:-1)

我不是正则表达的忠实粉丝,因为当你或其他人在6个月内回归它时,它往往会使事情复杂化。我会将文件读成一条线,并使用偏移量作为测试线的方式。

func main() {
    var (
        dayName    string
        month      string
        name       string
        A          string
        league     string
        score      string
        scorers    string
        attendance string
        day        int
        year       int
        err        error
    )
    data, errRead := ioutil.ReadFile(fileName)
    if errRead != nil {
        return
    }

    //  get the files as a block of text
    theText := string(data)
    //  make the line endings consistent
    theText = strings.Replace(theText, "\r\n", "\r", -1)
    theText = strings.Replace(theText, "\n", "\r", -1)
    //  split it into a set of lines
    lines := strings.Split(theText, "\r")
    numLines := len(lines)
    i := 0
    for i < numLines {
        //      at this point we should have your test line
        theLine := lines[i]
        i++
        //      give each line a consistent spacing, you never know what state it is in
        theLine = strings.Replace(theLine, "  ", " ", -1)
        parts := strings.Split(theLine, " ")
        if len(parts) == 4 {
            //         At least the line has the four date parts
            dayName := parts[0]
            day, err = strconv.Atoi(parts[1])
            if err == nil {
                //             We have a number for the day
                month := parts[2]
                year, err = strconv.Atoi(parts[3])
                if err == nil {
                    //                 We have a number for the year
                    //                 the next five lines are your data
                    name = lines[i]
                    A = lines[i+1]
                    league = lines[i+2]
                    score = lines[i+3]
                    scorers = lines[i+4]
                    attendance = lines[i+5]
                    i += 6
                }
            }
        }
    }
}

对于得分等,你必须自己解析,但这将是相当微不足道的。您还需要记住,当从其他人那里获取数据时,他们可能并不总是像您希望的那样一致。