2023-07-11 02:42:05 +02:00
|
|
|
package bundle
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"io"
|
|
|
|
"io/fs"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
2023-10-11 11:18:32 +02:00
|
|
|
"sync"
|
2023-07-11 02:42:05 +02:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
|
|
|
|
"forge.cadoles.com/arcad/edge/pkg/bundle/zim"
|
2023-10-11 11:18:32 +02:00
|
|
|
lru "github.com/hashicorp/golang-lru/v2"
|
2023-07-11 02:42:05 +02:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
"gitlab.com/wpetit/goweb/logger"
|
|
|
|
"gopkg.in/yaml.v2"
|
|
|
|
)
|
|
|
|
|
|
|
|
type ZimBundle struct {
|
|
|
|
archivePath string
|
2023-10-11 11:18:32 +02:00
|
|
|
|
|
|
|
initOnce sync.Once
|
|
|
|
initErr error
|
|
|
|
|
|
|
|
reader *zim.Reader
|
|
|
|
urlNamespaceCache *lru.Cache[string, zim.Namespace]
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) File(filename string) (io.ReadCloser, os.FileInfo, error) {
|
|
|
|
ctx := logger.With(
|
|
|
|
context.Background(),
|
|
|
|
logger.F("filename", filename),
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.Debug(ctx, "opening file")
|
|
|
|
|
|
|
|
switch filename {
|
|
|
|
case "manifest.yml":
|
|
|
|
return b.renderFakeManifest(ctx)
|
|
|
|
case "server/main.js":
|
|
|
|
return b.renderFakeServerMain(ctx)
|
|
|
|
case "public":
|
|
|
|
return b.renderDirectory(ctx, filename)
|
|
|
|
case "public/index.html":
|
2023-10-11 11:18:32 +02:00
|
|
|
return b.renderMainPage(ctx, filename)
|
2023-07-11 02:42:05 +02:00
|
|
|
|
|
|
|
default:
|
|
|
|
return b.renderURL(ctx, filename)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) Dir(dirname string) ([]os.FileInfo, error) {
|
|
|
|
files := make([]os.FileInfo, 0)
|
|
|
|
return files, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) renderFakeManifest(ctx context.Context) (io.ReadCloser, os.FileInfo, error) {
|
2023-10-11 11:18:32 +02:00
|
|
|
if err := b.init(); err != nil {
|
2023-07-11 02:42:05 +02:00
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
metadata, err := b.reader.Metadata()
|
2023-07-11 02:42:05 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
manifest := map[string]any{}
|
|
|
|
|
|
|
|
manifest["version"] = "0.0.0"
|
|
|
|
|
|
|
|
if name, exists := metadata[zim.MetadataName]; exists {
|
|
|
|
replacer := strings.NewReplacer(
|
|
|
|
"_", "",
|
|
|
|
" ", "",
|
|
|
|
)
|
|
|
|
|
|
|
|
manifest["id"] = strings.ToLower(replacer.Replace(name)) + ".zim.edge.app"
|
|
|
|
} else {
|
2023-10-11 11:18:32 +02:00
|
|
|
manifest["id"] = b.reader.UUID() + ".zim.edge.app"
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if title, exists := metadata[zim.MetadataTitle]; exists {
|
|
|
|
manifest["title"] = title
|
|
|
|
} else {
|
|
|
|
manifest["title"] = "Unknown"
|
|
|
|
}
|
|
|
|
|
|
|
|
if description, exists := metadata[zim.MetadataDescription]; exists {
|
|
|
|
manifest["description"] = description
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
favicon, err := b.reader.Favicon()
|
2023-07-11 02:42:05 +02:00
|
|
|
if err != nil && !errors.Is(err, zim.ErrNotFound) {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if favicon != nil {
|
|
|
|
manifestMeta, exists := manifest["metadata"].(map[string]any)
|
|
|
|
if !exists {
|
|
|
|
manifestMeta = make(map[string]any)
|
|
|
|
manifest["metadata"] = manifestMeta
|
|
|
|
}
|
|
|
|
|
|
|
|
paths, exists := manifestMeta["paths"].(map[string]any)
|
|
|
|
if !exists {
|
|
|
|
paths = make(map[string]any)
|
|
|
|
manifestMeta["paths"] = paths
|
|
|
|
}
|
|
|
|
|
|
|
|
paths["icon"] = "/" + favicon.FullURL()
|
|
|
|
}
|
|
|
|
|
|
|
|
data, err := yaml.Marshal(manifest)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
stat := &zimFileInfo{
|
|
|
|
isDir: false,
|
|
|
|
modTime: time.Time{},
|
|
|
|
mode: 0,
|
|
|
|
name: "manifest.yml",
|
|
|
|
size: int64(len(data)),
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := bytes.NewBuffer(data)
|
2023-10-11 11:18:32 +02:00
|
|
|
file := io.NopCloser(buf)
|
2023-07-11 02:42:05 +02:00
|
|
|
|
|
|
|
return file, stat, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) renderFakeServerMain(ctx context.Context) (io.ReadCloser, os.FileInfo, error) {
|
|
|
|
stat := &zimFileInfo{
|
|
|
|
isDir: false,
|
|
|
|
modTime: time.Time{},
|
|
|
|
mode: 0,
|
|
|
|
name: "server/main.js",
|
|
|
|
size: 0,
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := bytes.NewBuffer(nil)
|
2023-10-11 11:18:32 +02:00
|
|
|
file := io.NopCloser(buf)
|
2023-07-11 02:42:05 +02:00
|
|
|
|
|
|
|
return file, stat, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) renderURL(ctx context.Context, url string) (io.ReadCloser, os.FileInfo, error) {
|
2023-10-11 11:18:32 +02:00
|
|
|
if err := b.init(); err != nil {
|
2023-07-11 02:42:05 +02:00
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
filename := filepath.Base(url)
|
|
|
|
url = strings.TrimPrefix(url, "public/")
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
entry, err := b.searchEntryFromURL(ctx, url)
|
2023-07-11 02:42:05 +02:00
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, zim.ErrNotFound) {
|
2023-10-11 11:18:32 +02:00
|
|
|
return nil, nil, os.ErrNotExist
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
logger.Debug(
|
|
|
|
ctx, "found zim entry",
|
|
|
|
logger.F("webURL", url),
|
|
|
|
logger.F("zimFullURL", entry.FullURL()),
|
|
|
|
)
|
2023-07-11 02:42:05 +02:00
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
content, err := entry.Redirect()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
2023-07-11 02:42:05 +02:00
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
contentReader, err := content.Reader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
size, err := contentReader.Size()
|
2023-07-11 02:42:05 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
mimeType := content.MimeType()
|
|
|
|
if mimeType != "text/html" {
|
|
|
|
zimFile := &zimFile{
|
|
|
|
fileInfo: &zimFileInfo{
|
|
|
|
isDir: false,
|
|
|
|
modTime: time.Time{},
|
|
|
|
mode: 0,
|
|
|
|
name: filename,
|
|
|
|
size: size,
|
|
|
|
},
|
|
|
|
reader: contentReader,
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
2023-10-11 11:18:32 +02:00
|
|
|
|
|
|
|
return zimFile, zimFile.fileInfo, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read HTML file and inject Edge scripts
|
|
|
|
|
|
|
|
data, err := io.ReadAll(contentReader)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
injected, err := b.injectEdgeScriptTag(data)
|
|
|
|
if err != nil {
|
|
|
|
logger.Error(ctx, "could not inject edge script", logger.E(errors.WithStack(err)))
|
|
|
|
} else {
|
|
|
|
data = injected
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
zimFile := &zimFile{
|
|
|
|
fileInfo: &zimFileInfo{
|
|
|
|
isDir: false,
|
|
|
|
modTime: time.Time{},
|
|
|
|
mode: 0,
|
|
|
|
name: filename,
|
2023-10-11 11:18:32 +02:00
|
|
|
size: size,
|
2023-07-11 02:42:05 +02:00
|
|
|
},
|
2023-10-11 11:18:32 +02:00
|
|
|
reader: io.NopCloser(bytes.NewBuffer(data)),
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return zimFile, zimFile.fileInfo, nil
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
func (b *ZimBundle) searchEntryFromURL(ctx context.Context, url string) (zim.Entry, error) {
|
|
|
|
ctx = logger.With(ctx, logger.F("webURL", url))
|
|
|
|
|
|
|
|
logger.Debug(ctx, "searching entry namespace in local cache")
|
|
|
|
|
|
|
|
// Search URL namespace from cache
|
|
|
|
if namespace, found := b.urlNamespaceCache.Get(url); found {
|
|
|
|
logger.Debug(ctx, "found entry namespace in cache")
|
|
|
|
|
|
|
|
entry, err := b.reader.EntryWithURL(namespace, url)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return entry, nil
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
// Try to access entry directly if the URL match the pattern <NS>/<URL>
|
|
|
|
urlParts := strings.SplitN(url, "/", 2)
|
|
|
|
if len(urlParts) == 2 && len(urlParts[0]) == 1 {
|
|
|
|
namespace := zim.Namespace(urlParts[0])
|
|
|
|
url = urlParts[1]
|
|
|
|
|
|
|
|
logger.Debug(
|
|
|
|
ctx, "trying to access entry directly",
|
|
|
|
logger.F("zimNamespace", namespace),
|
|
|
|
logger.F("zimURL", url),
|
|
|
|
)
|
|
|
|
|
|
|
|
entry, err := b.reader.EntryWithURL(namespace, url)
|
|
|
|
if err != nil && !errors.Is(err, zim.ErrNotFound) {
|
|
|
|
return nil, errors.WithStack(err)
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
if entry != nil {
|
|
|
|
b.urlNamespaceCache.Add(url, entry.Namespace())
|
|
|
|
return entry, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
contentNamespaces := []zim.Namespace{
|
|
|
|
zim.V6NamespaceContent,
|
|
|
|
zim.V6NamespaceMetadata,
|
|
|
|
zim.V5NamespaceLayout,
|
|
|
|
zim.V5NamespaceArticle,
|
|
|
|
zim.V5NamespaceImageFile,
|
|
|
|
zim.V5NamespaceMetadata,
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.Debug(
|
|
|
|
ctx, "make educated guesses about potential url namespace",
|
|
|
|
logger.F("zimNamespaces", contentNamespaces),
|
|
|
|
)
|
|
|
|
|
|
|
|
for _, ns := range contentNamespaces {
|
|
|
|
logger.Debug(
|
|
|
|
ctx, "trying to access entry directly",
|
|
|
|
logger.F("zimNamespace", ns),
|
|
|
|
logger.F("zimURL", url),
|
|
|
|
)
|
|
|
|
|
|
|
|
entry, err := b.reader.EntryWithURL(ns, url)
|
|
|
|
if err != nil && !errors.Is(err, zim.ErrNotFound) {
|
|
|
|
return nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if entry != nil {
|
|
|
|
b.urlNamespaceCache.Add(url, entry.Namespace())
|
|
|
|
return entry, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.Debug(ctx, "doing full entries scan")
|
|
|
|
|
|
|
|
var entry zim.Entry
|
|
|
|
|
|
|
|
iterator := b.reader.Entries()
|
|
|
|
for iterator.Next() {
|
|
|
|
current := iterator.Entry()
|
|
|
|
|
|
|
|
if current.FullURL() != url && current.URL() != url {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
entry = current
|
|
|
|
b.urlNamespaceCache.Add(url, entry.Namespace())
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if err := iterator.Err(); err != nil {
|
|
|
|
return nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if entry == nil {
|
|
|
|
return nil, errors.WithStack(zim.ErrNotFound)
|
|
|
|
}
|
|
|
|
|
|
|
|
return entry, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) renderDirectory(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) {
|
2023-07-11 02:42:05 +02:00
|
|
|
zimFile := &zimFile{
|
|
|
|
fileInfo: &zimFileInfo{
|
|
|
|
isDir: true,
|
|
|
|
modTime: time.Time{},
|
|
|
|
mode: 0,
|
|
|
|
name: filename,
|
|
|
|
size: 0,
|
|
|
|
},
|
2023-10-11 11:18:32 +02:00
|
|
|
reader: io.NopCloser(bytes.NewBuffer(nil)),
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return zimFile, zimFile.fileInfo, nil
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
func (b *ZimBundle) renderMainPage(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) {
|
|
|
|
if err := b.init(); err != nil {
|
2023-07-11 02:42:05 +02:00
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
main, err := b.reader.MainPage()
|
2023-07-11 02:42:05 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
return b.renderURL(ctx, main.FullURL())
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (b *ZimBundle) injectEdgeScriptTag(data []byte) ([]byte, error) {
|
|
|
|
buff := bytes.NewBuffer(data)
|
|
|
|
doc, err := html.Parse(buff)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var f func(*html.Node) bool
|
|
|
|
f = func(n *html.Node) bool {
|
|
|
|
if n.Type == html.ElementNode && n.Data == "head" {
|
|
|
|
script := &html.Node{
|
|
|
|
Type: html.ElementNode,
|
|
|
|
Data: "script",
|
|
|
|
Attr: []html.Attribute{
|
|
|
|
{
|
|
|
|
Key: "src",
|
|
|
|
Val: "/edge/sdk/client.js",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
n.AppendChild(script)
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
if keepWalking := f(c); !keepWalking {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
f(doc)
|
|
|
|
|
|
|
|
buff.Reset()
|
|
|
|
|
|
|
|
if err := html.Render(buff, doc); err != nil {
|
|
|
|
return nil, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return buff.Bytes(), nil
|
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
func (b *ZimBundle) init() error {
|
|
|
|
b.initOnce.Do(func() {
|
|
|
|
reader, err := zim.Open(b.archivePath)
|
|
|
|
if err != nil {
|
|
|
|
b.initErr = errors.Wrapf(err, "could not open '%v'", b.archivePath)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
b.reader = reader
|
|
|
|
|
|
|
|
cache, err := lru.New[string, zim.Namespace](128)
|
|
|
|
if err != nil {
|
|
|
|
b.initErr = errors.Wrap(err, "could not initialize cache")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
b.urlNamespaceCache = cache
|
|
|
|
})
|
|
|
|
if b.initErr != nil {
|
|
|
|
return errors.WithStack(b.initErr)
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
2023-10-11 11:18:32 +02:00
|
|
|
return nil
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewZimBundle(archivePath string) *ZimBundle {
|
|
|
|
return &ZimBundle{
|
|
|
|
archivePath: archivePath,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type zimFile struct {
|
|
|
|
fileInfo *zimFileInfo
|
2023-10-11 11:18:32 +02:00
|
|
|
reader io.ReadCloser
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close implements fs.File.
|
|
|
|
func (f *zimFile) Close() error {
|
2023-10-11 11:18:32 +02:00
|
|
|
if err := f.reader.Close(); err != nil {
|
|
|
|
return errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
2023-07-11 02:42:05 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read implements fs.File.
|
|
|
|
func (f *zimFile) Read(d []byte) (int, error) {
|
2023-10-11 11:18:32 +02:00
|
|
|
n, err := f.reader.Read(d)
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, io.EOF) {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return n, errors.WithStack(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return n, nil
|
2023-07-11 02:42:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Stat implements fs.File.
|
|
|
|
func (f *zimFile) Stat() (fs.FileInfo, error) {
|
|
|
|
return f.fileInfo, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ fs.File = &zimFile{}
|
|
|
|
|
|
|
|
type zimFileInfo struct {
|
|
|
|
isDir bool
|
|
|
|
modTime time.Time
|
|
|
|
mode fs.FileMode
|
|
|
|
name string
|
|
|
|
size int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsDir implements fs.FileInfo.
|
|
|
|
func (i *zimFileInfo) IsDir() bool {
|
|
|
|
return i.isDir
|
|
|
|
}
|
|
|
|
|
|
|
|
// ModTime implements fs.FileInfo.
|
|
|
|
func (i *zimFileInfo) ModTime() time.Time {
|
|
|
|
return i.modTime
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mode implements fs.FileInfo.
|
|
|
|
func (i *zimFileInfo) Mode() fs.FileMode {
|
|
|
|
return i.mode
|
|
|
|
}
|
|
|
|
|
|
|
|
// Name implements fs.FileInfo.
|
|
|
|
func (i *zimFileInfo) Name() string {
|
|
|
|
return i.name
|
|
|
|
}
|
|
|
|
|
|
|
|
// Size implements fs.FileInfo.
|
|
|
|
func (i *zimFileInfo) Size() int64 {
|
|
|
|
return i.size
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sys implements fs.FileInfo.
|
|
|
|
func (*zimFileInfo) Sys() any {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ fs.FileInfo = &zimFileInfo{}
|