warc/reader.go
2019-09-26 20:41:28 +07:00

94 lines
1.7 KiB
Go

package warc
import (
"fmt"
"os"
"go.etcd.io/bbolt"
)
// Archive is the storage for archiving the web page.
type Archive struct {
db *bbolt.DB
}
// Open opens the archive from specified path.
func Open(path string) (*Archive, error) {
// Make sure archive exists
info, err := os.Stat(path)
if os.IsNotExist(err) || info.IsDir() {
return nil, fmt.Errorf("archive doesn't exist")
}
// Open database
options := &bbolt.Options{
ReadOnly: true,
}
db, err := bbolt.Open(path, os.ModePerm, options)
if err != nil {
return nil, err
}
return &Archive{db: db}, nil
}
// Close closes the storage.
func (arc *Archive) Close() {
arc.db.Close()
}
// Read fetch the resource with specified name from archive.
func (arc *Archive) Read(name string) ([]byte, string, error) {
// Make sure name exists
if name == "" {
name = "archive-root"
}
var content []byte
var strContentType string
err := arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
if bucket == nil {
return fmt.Errorf("%s doesn't exist", name)
}
contentType := bucket.Get([]byte("type"))
if contentType == nil {
return fmt.Errorf("%s doesn't exist", name)
}
strContentType = string(contentType)
content = bucket.Get([]byte("content"))
if content == nil {
return fmt.Errorf("%s doesn't exist", name)
}
return nil
})
if err != nil {
return nil, "", err
}
return content, strContentType, nil
}
// HasResource checks if the resource exists in archive.
func (arc *Archive) HasResource(name string) bool {
// Make sure name exists
if name == "" {
name = "archive-root"
}
var exists bool
arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
exists = bucket != nil
return nil
})
return exists
}