practice/intersection/main.go

249 lines
4.3 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
)
//思路大文件拆分成小文件 100亿个int64占用36G内存每个int64对125取余分别放入125个子文件中,每个文件估计占用294M,相同后缀子文件进行对比。
const (
MOD int = 125
)
type CompareFile struct {
Files [2]string
}
type Intersection struct {
Err error
Mod int
CompareFile
}
func main() {
//按需读取文件按 带bufio 400M缓存 \r\n 读取打开125文件描述符循环开启循环关闭
if len(os.Args) < 3 {
fmt.Println("请在 命令行后输入两个需要对比 文件路径")
os.Exit(1)
}
inters := Intersection{
CompareFile: CompareFile{[2]string{os.Args[1], os.Args[2]}},
Mod: 125,
}
//验证
inters.CheckValid()
if inters.Err != nil {
panic(inters.Err)
}
//分割文件生成子文件
inters.splitFile()
//进行对比
inters.intersection()
}
func (in Intersection) CheckValid() {
in.Err = in.CompareFile.CheckValid()
}
func (cf CompareFile) CheckValid() error {
for _, filename := range cf.Files {
finfo, err := os.Stat(filename)
if err != nil {
return err
}
if finfo.IsDir() {
return errors.New(filename + "是一个目录")
}
}
return nil
}
func (in Intersection) splitFile() {
for _, srcFile := range in.CompareFile.Files {
fileHandlerMap := map[string]*os.File{}
//文件读取
readHandler, error := os.OpenFile(srcFile, os.O_RDONLY, 0)
if error != nil {
panic(error)
}
defer readHandler.Close()
fileArr := strings.Split(srcFile, ".")
fileStr := fileArr[0]
//400M buf
rd := bufio.NewReaderSize(readHandler, 400*1024*1024)
for {
content, readerr := rd.ReadString('\n')
content = strings.Trim(content, "\r\n")
number, err := strconv.ParseInt(content, 10, 64)
if err != nil {
fmt.Println(err.Error())
continue
}
fileSuffix := strconv.FormatInt(number%125, 10)
subFileName := "./" + fileStr + "." + fileSuffix
fileHandler, ok := fileHandlerMap[subFileName]
//新建句柄
if !ok {
fileHandler, err = os.Create(subFileName)
if err != nil {
panic(err.Error())
}
fileHandlerMap[subFileName] = fileHandler
}
//写入内容
writeCont := []byte(content + "\n")
if _, err = fileHandler.Write(writeCont); err != nil {
fmt.Println(err.Error())
continue
}
//结束文件读取
if readerr != nil || readerr == io.EOF {
break
}
}
//关闭子文件句柄
for _, f := range fileHandlerMap {
f.Close()
}
}
}
func (in Intersection) intersection() {
intersectionMap := map[int64]bool{}
//写入文件句柄
fileHandler, err := os.Create("./intersection.txt")
if err != nil {
panic(err.Error())
}
defer fileHandler.Close()
//同时存在
for i := 0; i <= MOD; i++ {
filea := "./" + in.CompareFile.Files[0] + "." + strconv.Itoa(i)
_, aerr := os.Stat(filea)
fileb := "./" + in.CompareFile.Files[1] + "." + strconv.Itoa(i)
_, berr := os.Stat(fileb)
defer func(filea, fileb string) {
os.Remove(filea)
os.Remove(fileb)
}(filea, fileb)
if os.IsNotExist(aerr) || os.IsNotExist(berr) {
continue
}
//a子文件读取
readHandler, error := os.OpenFile(filea, os.O_RDONLY, 0)
if error != nil {
panic(error)
}
defer readHandler.Close()
rd := bufio.NewReader(readHandler)
intersectionMap = map[int64]bool{} //置空
for {
content, readerr := rd.ReadString('\n')
//结束文件读取
if readerr != nil || readerr == io.EOF {
break
}
content = strings.Trim(content, "\r\n")
number, _ := strconv.ParseInt(content, 10, 64)
_, ok := intersectionMap[number]
if !ok {
intersectionMap[number] = false
}
}
//b子文件读取
readHandler, error = os.OpenFile(fileb, os.O_RDONLY, 0)
rd = bufio.NewReader(readHandler)
for {
content, readerr := rd.ReadString('\n')
//结束文件读取
if readerr != nil || readerr == io.EOF {
break
}
content = strings.Trim(content, "\r\n")
number, _ := strconv.ParseInt(content, 10, 64)
_, ok := intersectionMap[number]
if ok {
intersectionMap[number] = true
}
}
for key, val := range intersectionMap {
if val {
//写入内容
str := strconv.FormatInt(key, 10) + "\n"
if _, err = fileHandler.WriteString(str); err != nil {
fmt.Println(err.Error())
continue
}
}
}
}
}