edge/pkg/bundle/zim_bundle.go
William Petit a5c67c29d0
Some checks failed
arcad/edge/pipeline/head There was a failure building this commit
feat(bundle): add zim format support
2023-10-19 22:20:52 +02:00

484 lines
9.5 KiB
Go

package bundle
import (
"bytes"
"context"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"sync"
"time"
"golang.org/x/net/html"
"forge.cadoles.com/arcad/edge/pkg/bundle/zim"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/pkg/errors"
"gitlab.com/wpetit/goweb/logger"
"gopkg.in/yaml.v2"
)
type ZimBundle struct {
archivePath string
initOnce sync.Once
initErr error
reader *zim.Reader
urlNamespaceCache *lru.Cache[string, zim.Namespace]
}
func (b *ZimBundle) File(filename string) (io.ReadCloser, os.FileInfo, error) {
ctx := logger.With(
context.Background(),
logger.F("filename", filename),
)
logger.Debug(ctx, "opening file")
switch filename {
case "manifest.yml":
return b.renderFakeManifest(ctx)
case "server/main.js":
return b.renderFakeServerMain(ctx)
case "public":
return b.renderDirectory(ctx, filename)
case "public/index.html":
return b.renderMainPage(ctx, filename)
default:
return b.renderURL(ctx, filename)
}
}
func (b *ZimBundle) Dir(dirname string) ([]os.FileInfo, error) {
files := make([]os.FileInfo, 0)
return files, nil
}
func (b *ZimBundle) renderFakeManifest(ctx context.Context) (io.ReadCloser, os.FileInfo, error) {
if err := b.init(); err != nil {
return nil, nil, errors.WithStack(err)
}
metadata, err := b.reader.Metadata()
if err != nil {
return nil, nil, errors.WithStack(err)
}
manifest := map[string]any{}
manifest["version"] = "0.0.0"
if name, exists := metadata[zim.MetadataName]; exists {
replacer := strings.NewReplacer(
"_", "",
" ", "",
)
manifest["id"] = strings.ToLower(replacer.Replace(name)) + ".zim.edge.app"
} else {
manifest["id"] = b.reader.UUID() + ".zim.edge.app"
}
if title, exists := metadata[zim.MetadataTitle]; exists {
manifest["title"] = title
} else {
manifest["title"] = "Unknown"
}
if description, exists := metadata[zim.MetadataDescription]; exists {
manifest["description"] = description
}
favicon, err := b.reader.Favicon()
if err != nil && !errors.Is(err, zim.ErrNotFound) {
return nil, nil, errors.WithStack(err)
}
if favicon != nil {
manifestMeta, exists := manifest["metadata"].(map[string]any)
if !exists {
manifestMeta = make(map[string]any)
manifest["metadata"] = manifestMeta
}
paths, exists := manifestMeta["paths"].(map[string]any)
if !exists {
paths = make(map[string]any)
manifestMeta["paths"] = paths
}
paths["icon"] = "/" + favicon.FullURL()
}
data, err := yaml.Marshal(manifest)
if err != nil {
return nil, nil, errors.WithStack(err)
}
stat := &zimFileInfo{
isDir: false,
modTime: time.Time{},
mode: 0,
name: "manifest.yml",
size: int64(len(data)),
}
buf := bytes.NewBuffer(data)
file := io.NopCloser(buf)
return file, stat, nil
}
func (b *ZimBundle) renderFakeServerMain(ctx context.Context) (io.ReadCloser, os.FileInfo, error) {
stat := &zimFileInfo{
isDir: false,
modTime: time.Time{},
mode: 0,
name: "server/main.js",
size: 0,
}
buf := bytes.NewBuffer(nil)
file := io.NopCloser(buf)
return file, stat, nil
}
func (b *ZimBundle) renderURL(ctx context.Context, url string) (io.ReadCloser, os.FileInfo, error) {
if err := b.init(); err != nil {
return nil, nil, errors.WithStack(err)
}
url = strings.TrimPrefix(url, "public/")
entry, err := b.searchEntryFromURL(ctx, url)
if err != nil {
if errors.Is(err, zim.ErrNotFound) {
return nil, nil, os.ErrNotExist
}
return nil, nil, errors.WithStack(err)
}
logger.Debug(
ctx, "found zim entry",
logger.F("webURL", url),
logger.F("zimFullURL", entry.FullURL()),
)
content, err := entry.Redirect()
if err != nil {
return nil, nil, errors.WithStack(err)
}
contentReader, err := content.Reader()
if err != nil {
return nil, nil, errors.WithStack(err)
}
size, err := contentReader.Size()
if err != nil {
return nil, nil, errors.WithStack(err)
}
filename := filepath.Base(url)
mimeType := content.MimeType()
if mimeType != "text/html" {
zimFile := &zimFile{
fileInfo: &zimFileInfo{
isDir: false,
modTime: time.Time{},
mode: 0,
name: filename,
size: size,
},
reader: contentReader,
}
return zimFile, zimFile.fileInfo, nil
}
// Read HTML file and inject Edge scripts
data, err := io.ReadAll(contentReader)
if err != nil {
return nil, nil, err
}
injected, err := b.injectEdgeScriptTag(data)
if err != nil {
logger.Error(ctx, "could not inject edge script", logger.E(errors.WithStack(err)))
} else {
data = injected
}
zimFile := &zimFile{
fileInfo: &zimFileInfo{
isDir: false,
modTime: time.Time{},
mode: 0,
name: filename,
size: size,
},
reader: io.NopCloser(bytes.NewBuffer(data)),
}
return zimFile, zimFile.fileInfo, nil
}
func (b *ZimBundle) searchEntryFromURL(ctx context.Context, url string) (zim.Entry, error) {
ctx = logger.With(ctx, logger.F("webURL", url))
logger.Debug(ctx, "searching entry namespace in local cache")
entry, err := b.reader.EntryWithFullURL(url)
if err != nil && !errors.Is(err, zim.ErrNotFound) {
return nil, errors.WithStack(err)
}
if entry != nil {
return entry, nil
}
contentNamespaces := []zim.Namespace{
zim.V6NamespaceContent,
zim.V6NamespaceMetadata,
zim.V5NamespaceLayout,
zim.V5NamespaceArticle,
zim.V5NamespaceImageFile,
zim.V5NamespaceMetadata,
}
logger.Debug(
ctx, "make educated guesses about potential url namespace",
logger.F("zimNamespaces", contentNamespaces),
)
for _, ns := range contentNamespaces {
logger.Debug(
ctx, "trying to access entry directly",
logger.F("zimNamespace", ns),
logger.F("zimURL", url),
)
entry, err := b.reader.EntryWithURL(ns, url)
if err != nil && !errors.Is(err, zim.ErrNotFound) {
return nil, errors.WithStack(err)
}
if entry != nil {
b.urlNamespaceCache.Add(url, entry.Namespace())
return entry, nil
}
}
logger.Debug(ctx, "doing full entries scan")
iterator := b.reader.Entries()
for iterator.Next() {
current := iterator.Entry()
if current.FullURL() != url && current.URL() != url {
continue
}
entry = current
b.urlNamespaceCache.Add(url, entry.Namespace())
break
}
if err := iterator.Err(); err != nil {
return nil, errors.WithStack(err)
}
if entry == nil {
return nil, errors.WithStack(zim.ErrNotFound)
}
return entry, nil
}
func (b *ZimBundle) renderDirectory(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) {
zimFile := &zimFile{
fileInfo: &zimFileInfo{
isDir: true,
modTime: time.Time{},
mode: 0,
name: filename,
size: 0,
},
reader: io.NopCloser(bytes.NewBuffer(nil)),
}
return zimFile, zimFile.fileInfo, nil
}
func (b *ZimBundle) renderMainPage(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) {
if err := b.init(); err != nil {
return nil, nil, errors.WithStack(err)
}
main, err := b.reader.MainPage()
if err != nil {
return nil, nil, errors.WithStack(err)
}
return b.renderURL(ctx, main.FullURL())
}
func (b *ZimBundle) injectEdgeScriptTag(data []byte) ([]byte, error) {
buff := bytes.NewBuffer(data)
doc, err := html.Parse(buff)
if err != nil {
return nil, errors.WithStack(err)
}
var f func(*html.Node) bool
f = func(n *html.Node) bool {
if n.Type == html.ElementNode && n.Data == "head" {
script := &html.Node{
Type: html.ElementNode,
Data: "script",
Attr: []html.Attribute{
{
Key: "src",
Val: "/edge/sdk/client.js",
},
},
}
n.AppendChild(script)
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if keepWalking := f(c); !keepWalking {
return false
}
}
return true
}
f(doc)
buff.Reset()
if err := html.Render(buff, doc); err != nil {
return nil, errors.WithStack(err)
}
return buff.Bytes(), nil
}
func (b *ZimBundle) init() error {
b.initOnce.Do(func() {
reader, err := zim.Open(b.archivePath)
if err != nil {
b.initErr = errors.Wrapf(err, "could not open '%v'", b.archivePath)
return
}
b.reader = reader
cache, err := lru.New[string, zim.Namespace](128)
if err != nil {
b.initErr = errors.Wrap(err, "could not initialize cache")
return
}
b.urlNamespaceCache = cache
})
if b.initErr != nil {
return errors.WithStack(b.initErr)
}
return nil
}
func NewZimBundle(archivePath string) *ZimBundle {
return &ZimBundle{
archivePath: archivePath,
}
}
type zimFile struct {
fileInfo *zimFileInfo
reader io.ReadCloser
}
// Close implements fs.File.
func (f *zimFile) Close() error {
if err := f.reader.Close(); err != nil {
return errors.WithStack(err)
}
return nil
}
// Read implements fs.File.
func (f *zimFile) Read(d []byte) (int, error) {
n, err := f.reader.Read(d)
if err != nil {
if errors.Is(err, io.EOF) {
return n, err
}
return n, errors.WithStack(err)
}
return n, nil
}
// Stat implements fs.File.
func (f *zimFile) Stat() (fs.FileInfo, error) {
return f.fileInfo, nil
}
var _ fs.File = &zimFile{}
type zimFileInfo struct {
isDir bool
modTime time.Time
mode fs.FileMode
name string
size int64
}
// IsDir implements fs.FileInfo.
func (i *zimFileInfo) IsDir() bool {
return i.isDir
}
// ModTime implements fs.FileInfo.
func (i *zimFileInfo) ModTime() time.Time {
return i.modTime
}
// Mode implements fs.FileInfo.
func (i *zimFileInfo) Mode() fs.FileMode {
return i.mode
}
// Name implements fs.FileInfo.
func (i *zimFileInfo) Name() string {
return i.name
}
// Size implements fs.FileInfo.
func (i *zimFileInfo) Size() int64 {
return i.size
}
// Sys implements fs.FileInfo.
func (*zimFileInfo) Sys() any {
return nil
}
var _ fs.FileInfo = &zimFileInfo{}