binary.read返回"意外的EOF"无论我使用什么dBase(.dbf)文件

时间:2015-10-09 21:52:53

标签: go dbase

func main() {
        file, err := os.Open("example.dbf") // For read access.
        if err != nil {
            log.Fatal(err)
        }

        dBaseioReader, err := NewReader(file)
        if err != nil {
            log.Fatal(err)
        }
        return nil
}

type dbHeader struct {
    Version             byte
    LastUpdate          [3]byte
    NumRecords          int32
    NumBytesInHeader    int16
    NumBytesInRecord    int16
    _                   [2]byte //reserved
    IncompatFlag        byte
    EncryptionFlag      byte
    MultiUserProcessing [12]byte
    MDXProductionFlag   byte
    LangDriverId        byte
    _                   [2]byte //reserved
    LangDriverName      [32]byte
    _                   [4]byte //reserved
}


type dbFieldDescriptor struct {
    FieldName         [32]byte
    FieldType         byte
    FieldLen          byte
    FieldDec          byte
    _                 [2]byte
    MDXProductionFlag byte
    _                 [2]byte
    NextAutoIncrement [4]byte
    _                 [4]byte
}


type DBaseReader struct {
    rawInput *bufio.Reader
    Header   *dbHeader
    Fields   []*dbFieldDescriptor

    recordsLeft int
}

func NewReader(input io.Reader) (dbr *DBaseReader, err error) {
    dbr = &DBaseReader{
        rawInput: bufio.NewReaderSize(input, 32*1024),
        Header:   &dbHeader{},
    }

    err = binary.Read(dbr.rawInput, binary.LittleEndian, dbr.Header)
    if err != nil{
        return
    }

    dbr.recordsLeft = int(dbr.Header.NumRecords)

    headerBytesLeft := dbr.Header.NumBytesInHeader
    headerBytesLeft -= dbHeaderSize

    // read field descriptors until 0x0D termination byte
    var term []byte
    for {
        field := &dbFieldDescriptor{}

        err = binary.Read(dbr.rawInput, binary.LittleEndian, field)
        if err != nil{
            //FIRST CRASH HAPPENS HERE.
            return
        }

        dbr.Fields = append(dbr.Fields, field)
        headerBytesLeft -= dbFieldDescriptorSize

        // check for terminator byte
        term, err = dbr.rawInput.Peek(1)
        if err != nil{
            return
        }

        if term[0] == 0x0D {
            break
        }
    }

    // read the terminator
    _, err = dbr.rawInput.ReadByte()
    if err != nil {
        return
    }
    headerBytesLeft -= 1

    if headerBytesLeft > 0 {
        err = fmt.Errorf("Error: Header Bytes Left: %d.. Read Properties?!..\n", headerBytesLeft)
        return

        // headerLeftOver := make([]byte, headerBytesLeft)
        // err = binary.Read(dbr.rawInput, binary.LittleEndian, headerLeftOver)
        // if err != nil {
        //  return
        // }

        // props := &dbFieldProperties{}
        // err = binary.Read(dbr.rawInput, binary.LittleEndian, props)
        // if err != nil {
        //  return
        // }

        // fmt.Printf("Props: %#v\n", props)
    }

    // read until first record marker
    _, err = dbr.rawInput.ReadBytes(' ')
    if err != nil {
        return
    }
    return dbr, nil
}

以上是相关代码。无论我使用什么样的dbf文件,程序都会崩溃。我不确定为什么我会继续"意外的EOF"错误。在过去的几天里,我一直试图弄清楚这一点,但遗憾的是没有运气。

1 个答案:

答案 0 :(得分:2)

您未提供任何证据证明您的文件格式正确无误。在编写程序之前,您应该已经确认您具有正确的文件格式。

文件的前256个字节是什么?例如,

hex.go

package main

import (
    "encoding/hex"
    "fmt"
    "io/ioutil"
    "os"
    "strconv"
)

func main() {
    if len(os.Args) <= 1 {
        fmt.Fprintln(os.Stderr, "usage: hex filename [bytes]")
        return
    }
    data, err := ioutil.ReadFile(os.Args[1])
    if err != nil {
        fmt.Fprintln(os.Stderr, "filename:", err)
        return
    }
    n := len(data)
    if len(os.Args) > 2 {
        i, err := strconv.Atoi(os.Args[2])
        if err != nil {
            fmt.Fprintln(os.Stderr, "bytes:", err)
            return
        }
        if n > i {
            n = i
        }
    }
    fmt.Print(hex.Dump(data[:n]))
}

输出:

$ go run hex.go example.dbf 256
00000000  03 01 04 18 01 00 00 00  41 07 d0 05 00 00 00 00  |........A.......|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 03 00 00  |................|
00000020  54 52 41 43 4b 5f 49 44  00 00 00 43 01 00 00 00  |TRACK_ID...C....|
00000030  0b 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000040  4c 4d 55 4c 54 00 00 00  00 00 00 4c 0c 00 00 00  |LMULT......L....|
00000050  01 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000060  4e 54 41 58 59 45 41 52  00 00 00 4e 0d 00 00 00  |NTAXYEAR...N....|
00000070  04 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000080  4e 43 4f 55 4e 54 59 43  4f 44 00 4e 11 00 00 00  |NCOUNTYCOD.N....|
00000090  02 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
000000a0  43 50 52 4f 50 41 44 44  00 00 00 43 13 00 00 00  |CPROPADD...C....|
000000b0  3c 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |<...............|
000000c0  4c 43 4f 4d 4d 49 4e 44  00 00 00 4c 4f 00 00 00  |LCOMMIND...LO...|
000000d0  01 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
000000e0  4c 56 41 43 4c 41 4e 44  00 00 00 4c 50 00 00 00  |LVACLAND...LP...|
000000f0  01 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
$
  

The table level

     

但桌子的等级是多少?级别表示其版本。该   dBASE表格式是随着时间的推移而演变的标准。当一个   dBASE的新版本对该格式进行了一些改进,一种新的   给出了格式级别编号,与新的dBASE版本相同。对于   例如,我们有对应于dBASE III的3级,4级,5级和7级,   dBASE IV,dBASE 5和Visual dBASE 7.没有级别6因为   没有Visual dBASE 6。

     

7级带来了许多改进。字段名称最多可以包含31个   字符(最多10个字符之前)。一些新的字段类型有   出现(例如,AutoIncrement字段,几乎   不可能给同一个表中的两个记录提供相同的数字)。   如果您的表必须由其他软件使用,您可能必须这样做   为了兼容性而牺牲这些优点,为数不多   应用程序可以使用7级表。

.dbf文件格式:

文件头字节0,位0-2表示版本号:dBASE级别5为3,dBASE级别7为4。

Level 5 DOS Headers

Xbase Data file (*.dbf)

dBASE Version 7 Table File