使用Python我可以做下一步:
equals = filecmp.cmp(file_old, file_new)
在go语言中是否有任何内置函数可以做到这一点?我用Google搜索但没有成功。
我可以在hash/crc32
包中使用一些哈希函数,但这比上面的Python代码更有用。
答案 0 :(得分:8)
答案 1 :(得分:8)
To complete the @captncraig answer, if you want to know if the two files are the same, you can use the SameFile(fi1, fi2 FileInfo) method from the OS package.
SameFile reports whether fi1 and fi2 describe the same file. For example, on Unix this means that the device and inode fields of the two underlying structures are identical;
Otherwise, if you want to check the files contents, here is a solution which checks the two files line by line avoiding the load of the entire files in memory.
First try: https://play.golang.org/p/NlQZRrW1dT
EDIT: Read by bytes chunks and fail fast if the files have not the same size. https://play.golang.org/p/YyYWuCRJXV
const chunkSize = 64000
func deepCompare(file1, file2 string) bool {
// Check file size ...
f1, err := os.Open(file1)
if err != nil {
log.Fatal(err)
}
f2, err := os.Open(file2)
if err != nil {
log.Fatal(err)
}
for {
b1 := make([]byte, chunkSize)
_, err1 := f1.Read(b1)
b2 := make([]byte, chunkSize)
_, err2 := f2.Read(b2)
if err1 != nil || err2 != nil {
if err1 == io.EOF && err2 == io.EOF {
return true
} else if err1 == io.EOF || err2 == io.EOF {
return false
} else {
log.Fatal(err1, err2)
}
}
if !bytes.Equal(b1, b2) {
return false
}
}
}
答案 2 :(得分:4)
如何使用bytes.Equal
?
package main
import (
"fmt"
"io/ioutil"
"log"
"bytes"
)
func main() {
// per comment, better to not read an entire file into memory
// this is simply a trivial example.
f1, err1 := ioutil.ReadFile("lines1.txt")
if err1 != nil {
log.Fatal(err1)
}
f2, err2 := ioutil.ReadFile("lines2.txt")
if err2 != nil {
log.Fatal(err2)
}
fmt.Println(bytes.Equal(f1, f2)) // Per comment, this is significantly more performant.
}
答案 3 :(得分:1)
您可以使用equalfile
等软件包主要API:
func CompareFile(path1, path2 string) (bool, error)
Godoc:https://godoc.org/github.com/udhos/equalfile
示例:
package main
import (
"fmt"
"os"
"github.com/udhos/equalfile"
)
func main() {
if len(os.Args) != 3 {
fmt.Printf("usage: equal file1 file2\n")
os.Exit(2)
}
file1 := os.Args[1]
file2 := os.Args[2]
equal, err := equalfile.CompareFile(file1, file2)
if err != nil {
fmt.Printf("equal: error: %v\n", err)
os.Exit(3)
}
if equal {
fmt.Println("equal: files match")
os.Exit(0)
}
fmt.Println("equal: files differ")
os.Exit(1)
}
答案 4 :(得分:1)
在检查现有答案后,我创建了一个简单的包,用于比较任意(有限)io.Reader
和文件作为一种便捷方法:https://github.com/hlubek/readercomp
示例:
package main
import (
"fmt"
"log"
"os"
"github.com/hlubek/readercomp"
)
func main() {
result, err := readercomp.FilesEqual(os.Args[1], os.Args[2])
if err != nil {
log.Fatal(err)
}
fmt.Println(result)
}
答案 5 :(得分:0)
这是我打出的io.Reader
。如果两个流不共享相等的内容,则可以_, err := io.Copy(ioutil.Discard, newCompareReader(a, b))
出错。通过限制不必要的数据复制,此实现针对性能进行了优化。
package main
import (
"bytes"
"errors"
"fmt"
"io"
)
type compareReader struct {
a io.Reader
b io.Reader
bBuf []byte // need buffer for comparing B's data with one that was read from A
}
func newCompareReader(a, b io.Reader) io.Reader {
return &compareReader{
a: a,
b: b,
}
}
func (c *compareReader) Read(p []byte) (int, error) {
if c.bBuf == nil {
// assuming p's len() stays the same, so we can optimize for both of their buffer
// sizes to be equal
c.bBuf = make([]byte, len(p))
}
// read only as much data as we can fit in both p and bBuf
readA, errA := c.a.Read(p[0:min(len(p), len(c.bBuf))])
if readA > 0 {
// bBuf is guaranteed to have at least readA space
if _, errB := io.ReadFull(c.b, c.bBuf[0:readA]); errB != nil { // docs: "EOF only if no bytes were read"
if errB == io.ErrUnexpectedEOF {
return readA, errors.New("compareReader: A had more data than B")
} else {
return readA, fmt.Errorf("compareReader: read error from B: %w", errB)
}
}
if !bytes.Equal(p[0:readA], c.bBuf[0:readA]) {
return readA, errors.New("compareReader: bytes not equal")
}
}
if errA == io.EOF {
// in happy case expecting EOF from B as well. might be extraneous call b/c we might've
// got it already from the for loop above, but it's easier to check here
readB, errB := c.b.Read(c.bBuf)
if readB > 0 {
return readA, errors.New("compareReader: B had more data than A")
}
if errB != io.EOF {
return readA, fmt.Errorf("compareReader: got EOF from A but not from B: %w", errB)
}
}
return readA, errA
}
答案 6 :(得分:0)
标准方法是对其进行统计并使用os.SameFile。
-https://groups.google.com/g/golang-nuts/c/G-5D6agvz2Q/m/2jV_6j6LBgAJ
os.SameFile
应该与Python的filecmp.cmp(f1, f2)
大致相同(即shallow=true
,这意味着它仅比较stat获取的文件信息)。
func SameFile(fi1, fi2 FileInfo) bool
SameFile报告fi1和fi2是否描述相同的文件。例如,在Unix上,这意味着两个基础结构的device和inode字段是相同的。在其他系统上,该决定可能基于路径名。 SameFile仅适用于此程序包的Stat返回的结果。在其他情况下返回false。
但是,如果您实际上想比较文件的内容,则必须自己做。
答案 7 :(得分:0)
类似的事情应该可以解决。我看着github.com/udhos/equalfile
,对我来说似乎有点矫kill过正。在此处调用compare()之前,应进行两次os.Stat()
调用并比较文件大小以获取较早的快速路径。
import (
"os"
"bytes"
"errors"
)
var errNotSame = errors.New("File contents are different")
func compare(p1, p2 string) error {
var (
buf1 [8192]byte
buf2 [8192]byte
)
fh1, err := os.Open(p1)
if err != nil {
return err
}
defer fh1.Close()
fh2, err := os.Open(p2)
if err != nil {
return err
}
defer fh2.Close()
for {
n1, err1 := fh1.Read(buf1[:])
n2, err2 := fh2.Read(buf2[:])
if err1 == io.EOF && err2 == io.EOF {
// files are the same!
return nil
}
if err1 == io.EOF || err2 == io.EOF {
return errNotSame
}
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
// short read on n1
for n1 < n2 {
more, err := fh1.Read(buf1[n1:n2])
if err == io.EOF {
return errNotSame
}
if err != nil {
return err
}
n1 += more
}
// short read on n2
for n2 < n1 {
more, err := fh2.Read(buf2[n2:n1])
if err == io.EOF {
return errNotSame
}
if err != nil {
return err
}
n2 += more
}
if n1 != n2 {
// should never happen
return fmt.Errorf("file compare reads out of sync: %d != %d", n1, n2)
}
if bytes.Compare(buf1[:n1], buf2[:n2]) != 0 {
return errNotSame
}
}
}