249 lines
4.3 KiB
Go
249 lines
4.3 KiB
Go
package main
|
||
|
||
import (
|
||
"bufio"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"os"
|
||
"strconv"
|
||
"strings"
|
||
)
|
||
|
||
//思路大文件拆分成小文件 100亿个int64占用36G内存,每个int64对125取余,分别放入125个子文件中,每个文件估计占用294M,相同后缀子文件进行对比。
|
||
|
||
const (
|
||
MOD int = 125
|
||
)
|
||
|
||
type CompareFile struct {
|
||
Files [2]string
|
||
}
|
||
|
||
type Intersection struct {
|
||
Err error
|
||
Mod int
|
||
CompareFile
|
||
}
|
||
|
||
func main() {
|
||
|
||
//按需读取文件按 带bufio 400M缓存 \r\n 读取,打开125文件描述符循环开启,循环关闭
|
||
if len(os.Args) < 3 {
|
||
fmt.Println("请在 命令行后输入两个需要对比 文件路径")
|
||
os.Exit(1)
|
||
}
|
||
|
||
inters := Intersection{
|
||
CompareFile: CompareFile{[2]string{os.Args[1], os.Args[2]}},
|
||
Mod: 125,
|
||
}
|
||
|
||
//验证
|
||
inters.CheckValid()
|
||
|
||
if inters.Err != nil {
|
||
panic(inters.Err)
|
||
}
|
||
|
||
//分割文件生成子文件
|
||
inters.splitFile()
|
||
|
||
//进行对比
|
||
inters.intersection()
|
||
|
||
}
|
||
|
||
func (in Intersection) CheckValid() {
|
||
in.Err = in.CompareFile.CheckValid()
|
||
}
|
||
|
||
func (cf CompareFile) CheckValid() error {
|
||
|
||
for _, filename := range cf.Files {
|
||
|
||
finfo, err := os.Stat(filename)
|
||
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if finfo.IsDir() {
|
||
return errors.New(filename + "是一个目录")
|
||
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (in Intersection) splitFile() {
|
||
|
||
for _, srcFile := range in.CompareFile.Files {
|
||
|
||
fileHandlerMap := map[string]*os.File{}
|
||
|
||
//文件读取
|
||
readHandler, error := os.OpenFile(srcFile, os.O_RDONLY, 0)
|
||
|
||
if error != nil {
|
||
panic(error)
|
||
}
|
||
|
||
defer readHandler.Close()
|
||
|
||
fileArr := strings.Split(srcFile, ".")
|
||
|
||
fileStr := fileArr[0]
|
||
//400M buf
|
||
rd := bufio.NewReaderSize(readHandler, 400*1024*1024)
|
||
|
||
for {
|
||
content, readerr := rd.ReadString('\n')
|
||
|
||
content = strings.Trim(content, "\r\n")
|
||
number, err := strconv.ParseInt(content, 10, 64)
|
||
|
||
if err != nil {
|
||
fmt.Println(err.Error())
|
||
continue
|
||
}
|
||
|
||
fileSuffix := strconv.FormatInt(number%125, 10)
|
||
|
||
subFileName := "./" + fileStr + "." + fileSuffix
|
||
|
||
fileHandler, ok := fileHandlerMap[subFileName]
|
||
|
||
//新建句柄
|
||
if !ok {
|
||
fileHandler, err = os.Create(subFileName)
|
||
if err != nil {
|
||
panic(err.Error())
|
||
}
|
||
fileHandlerMap[subFileName] = fileHandler
|
||
}
|
||
|
||
//写入内容
|
||
writeCont := []byte(content + "\n")
|
||
if _, err = fileHandler.Write(writeCont); err != nil {
|
||
fmt.Println(err.Error())
|
||
continue
|
||
}
|
||
|
||
//结束文件读取
|
||
if readerr != nil || readerr == io.EOF {
|
||
break
|
||
}
|
||
|
||
}
|
||
|
||
//关闭子文件句柄
|
||
for _, f := range fileHandlerMap {
|
||
f.Close()
|
||
}
|
||
|
||
}
|
||
|
||
}
|
||
|
||
func (in Intersection) intersection() {
|
||
|
||
intersectionMap := map[int64]bool{}
|
||
|
||
//写入文件句柄
|
||
fileHandler, err := os.Create("./intersection.txt")
|
||
if err != nil {
|
||
panic(err.Error())
|
||
}
|
||
|
||
defer fileHandler.Close()
|
||
|
||
//同时存在
|
||
for i := 0; i <= MOD; i++ {
|
||
|
||
filea := "./" + in.CompareFile.Files[0] + "." + strconv.Itoa(i)
|
||
_, aerr := os.Stat(filea)
|
||
|
||
fileb := "./" + in.CompareFile.Files[1] + "." + strconv.Itoa(i)
|
||
_, berr := os.Stat(fileb)
|
||
|
||
defer func(filea, fileb string) {
|
||
os.Remove(filea)
|
||
os.Remove(fileb)
|
||
}(filea, fileb)
|
||
|
||
if os.IsNotExist(aerr) || os.IsNotExist(berr) {
|
||
continue
|
||
}
|
||
|
||
//a子文件读取
|
||
readHandler, error := os.OpenFile(filea, os.O_RDONLY, 0)
|
||
|
||
if error != nil {
|
||
panic(error)
|
||
}
|
||
defer readHandler.Close()
|
||
|
||
rd := bufio.NewReader(readHandler)
|
||
|
||
intersectionMap = map[int64]bool{} //置空
|
||
|
||
for {
|
||
|
||
content, readerr := rd.ReadString('\n')
|
||
|
||
//结束文件读取
|
||
if readerr != nil || readerr == io.EOF {
|
||
break
|
||
}
|
||
|
||
content = strings.Trim(content, "\r\n")
|
||
|
||
number, _ := strconv.ParseInt(content, 10, 64)
|
||
|
||
_, ok := intersectionMap[number]
|
||
if !ok {
|
||
intersectionMap[number] = false
|
||
}
|
||
|
||
}
|
||
|
||
//b子文件读取
|
||
readHandler, error = os.OpenFile(fileb, os.O_RDONLY, 0)
|
||
rd = bufio.NewReader(readHandler)
|
||
|
||
for {
|
||
|
||
content, readerr := rd.ReadString('\n')
|
||
|
||
//结束文件读取
|
||
if readerr != nil || readerr == io.EOF {
|
||
break
|
||
}
|
||
|
||
content = strings.Trim(content, "\r\n")
|
||
|
||
number, _ := strconv.ParseInt(content, 10, 64)
|
||
|
||
_, ok := intersectionMap[number]
|
||
if ok {
|
||
intersectionMap[number] = true
|
||
}
|
||
|
||
}
|
||
|
||
for key, val := range intersectionMap {
|
||
if val {
|
||
//写入内容
|
||
str := strconv.FormatInt(key, 10) + "\n"
|
||
if _, err = fileHandler.WriteString(str); err != nil {
|
||
fmt.Println(err.Error())
|
||
continue
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
}
|