package bundle import ( "bytes" "context" "io" "io/fs" "os" "path/filepath" "strings" "sync" "time" "golang.org/x/net/html" "forge.cadoles.com/arcad/edge/pkg/bundle/zim" lru "github.com/hashicorp/golang-lru/v2" "github.com/pkg/errors" "gitlab.com/wpetit/goweb/logger" "gopkg.in/yaml.v2" ) type ZimBundle struct { archivePath string initOnce sync.Once initErr error reader *zim.Reader urlNamespaceCache *lru.Cache[string, zim.Namespace] } func (b *ZimBundle) File(filename string) (io.ReadCloser, os.FileInfo, error) { ctx := logger.With( context.Background(), logger.F("filename", filename), ) logger.Debug(ctx, "opening file") switch filename { case "manifest.yml": return b.renderFakeManifest(ctx) case "server/main.js": return b.renderFakeServerMain(ctx) case "public": return b.renderDirectory(ctx, filename) case "public/index.html": return b.renderMainPage(ctx, filename) default: return b.renderURL(ctx, filename) } } func (b *ZimBundle) Dir(dirname string) ([]os.FileInfo, error) { files := make([]os.FileInfo, 0) return files, nil } func (b *ZimBundle) renderFakeManifest(ctx context.Context) (io.ReadCloser, os.FileInfo, error) { if err := b.init(); err != nil { return nil, nil, errors.WithStack(err) } metadata, err := b.reader.Metadata() if err != nil { return nil, nil, errors.WithStack(err) } manifest := map[string]any{} manifest["version"] = "0.0.0" if name, exists := metadata[zim.MetadataName]; exists { replacer := strings.NewReplacer( "_", "", " ", "", ) manifest["id"] = strings.ToLower(replacer.Replace(name)) + ".zim.edge.app" } else { manifest["id"] = b.reader.UUID() + ".zim.edge.app" } if title, exists := metadata[zim.MetadataTitle]; exists { manifest["title"] = title } else { manifest["title"] = "Unknown" } if description, exists := metadata[zim.MetadataDescription]; exists { manifest["description"] = description } favicon, err := b.reader.Favicon() if err != nil && !errors.Is(err, zim.ErrNotFound) { return nil, nil, errors.WithStack(err) } if favicon != nil { manifestMeta, exists := manifest["metadata"].(map[string]any) if !exists { manifestMeta = make(map[string]any) manifest["metadata"] = manifestMeta } paths, exists := manifestMeta["paths"].(map[string]any) if !exists { paths = make(map[string]any) manifestMeta["paths"] = paths } paths["icon"] = "/" + favicon.FullURL() } data, err := yaml.Marshal(manifest) if err != nil { return nil, nil, errors.WithStack(err) } stat := &zimFileInfo{ isDir: false, modTime: time.Time{}, mode: 0, name: "manifest.yml", size: int64(len(data)), } buf := bytes.NewBuffer(data) file := io.NopCloser(buf) return file, stat, nil } func (b *ZimBundle) renderFakeServerMain(ctx context.Context) (io.ReadCloser, os.FileInfo, error) { stat := &zimFileInfo{ isDir: false, modTime: time.Time{}, mode: 0, name: "server/main.js", size: 0, } buf := bytes.NewBuffer(nil) file := io.NopCloser(buf) return file, stat, nil } func (b *ZimBundle) renderURL(ctx context.Context, url string) (io.ReadCloser, os.FileInfo, error) { if err := b.init(); err != nil { return nil, nil, errors.WithStack(err) } filename := filepath.Base(url) url = strings.TrimPrefix(url, "public/") entry, err := b.searchEntryFromURL(ctx, url) if err != nil { if errors.Is(err, zim.ErrNotFound) { return nil, nil, os.ErrNotExist } return nil, nil, errors.WithStack(err) } logger.Debug( ctx, "found zim entry", logger.F("webURL", url), logger.F("zimFullURL", entry.FullURL()), ) content, err := entry.Redirect() if err != nil { return nil, nil, errors.WithStack(err) } contentReader, err := content.Reader() if err != nil { return nil, nil, errors.WithStack(err) } size, err := contentReader.Size() if err != nil { return nil, nil, errors.WithStack(err) } mimeType := content.MimeType() if mimeType != "text/html" { zimFile := &zimFile{ fileInfo: &zimFileInfo{ isDir: false, modTime: time.Time{}, mode: 0, name: filename, size: size, }, reader: contentReader, } return zimFile, zimFile.fileInfo, nil } // Read HTML file and inject Edge scripts data, err := io.ReadAll(contentReader) if err != nil { return nil, nil, err } injected, err := b.injectEdgeScriptTag(data) if err != nil { logger.Error(ctx, "could not inject edge script", logger.E(errors.WithStack(err))) } else { data = injected } zimFile := &zimFile{ fileInfo: &zimFileInfo{ isDir: false, modTime: time.Time{}, mode: 0, name: filename, size: size, }, reader: io.NopCloser(bytes.NewBuffer(data)), } return zimFile, zimFile.fileInfo, nil } func (b *ZimBundle) searchEntryFromURL(ctx context.Context, url string) (zim.Entry, error) { ctx = logger.With(ctx, logger.F("webURL", url)) logger.Debug(ctx, "searching entry namespace in local cache") // Search URL namespace from cache if namespace, found := b.urlNamespaceCache.Get(url); found { logger.Debug(ctx, "found entry namespace in cache") entry, err := b.reader.EntryWithURL(namespace, url) if err != nil { return nil, errors.WithStack(err) } return entry, nil } // Try to access entry directly if the URL match the pattern / urlParts := strings.SplitN(url, "/", 2) if len(urlParts) == 2 && len(urlParts[0]) == 1 { namespace := zim.Namespace(urlParts[0]) url = urlParts[1] logger.Debug( ctx, "trying to access entry directly", logger.F("zimNamespace", namespace), logger.F("zimURL", url), ) entry, err := b.reader.EntryWithURL(namespace, url) if err != nil && !errors.Is(err, zim.ErrNotFound) { return nil, errors.WithStack(err) } if entry != nil { b.urlNamespaceCache.Add(url, entry.Namespace()) return entry, nil } } contentNamespaces := []zim.Namespace{ zim.V6NamespaceContent, zim.V6NamespaceMetadata, zim.V5NamespaceLayout, zim.V5NamespaceArticle, zim.V5NamespaceImageFile, zim.V5NamespaceMetadata, } logger.Debug( ctx, "make educated guesses about potential url namespace", logger.F("zimNamespaces", contentNamespaces), ) for _, ns := range contentNamespaces { logger.Debug( ctx, "trying to access entry directly", logger.F("zimNamespace", ns), logger.F("zimURL", url), ) entry, err := b.reader.EntryWithURL(ns, url) if err != nil && !errors.Is(err, zim.ErrNotFound) { return nil, errors.WithStack(err) } if entry != nil { b.urlNamespaceCache.Add(url, entry.Namespace()) return entry, nil } } logger.Debug(ctx, "doing full entries scan") var entry zim.Entry iterator := b.reader.Entries() for iterator.Next() { current := iterator.Entry() if current.FullURL() != url && current.URL() != url { continue } entry = current b.urlNamespaceCache.Add(url, entry.Namespace()) break } if err := iterator.Err(); err != nil { return nil, errors.WithStack(err) } if entry == nil { return nil, errors.WithStack(zim.ErrNotFound) } return entry, nil } func (b *ZimBundle) renderDirectory(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) { zimFile := &zimFile{ fileInfo: &zimFileInfo{ isDir: true, modTime: time.Time{}, mode: 0, name: filename, size: 0, }, reader: io.NopCloser(bytes.NewBuffer(nil)), } return zimFile, zimFile.fileInfo, nil } func (b *ZimBundle) renderMainPage(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) { if err := b.init(); err != nil { return nil, nil, errors.WithStack(err) } main, err := b.reader.MainPage() if err != nil { return nil, nil, errors.WithStack(err) } return b.renderURL(ctx, main.FullURL()) } func (b *ZimBundle) injectEdgeScriptTag(data []byte) ([]byte, error) { buff := bytes.NewBuffer(data) doc, err := html.Parse(buff) if err != nil { return nil, errors.WithStack(err) } var f func(*html.Node) bool f = func(n *html.Node) bool { if n.Type == html.ElementNode && n.Data == "head" { script := &html.Node{ Type: html.ElementNode, Data: "script", Attr: []html.Attribute{ { Key: "src", Val: "/edge/sdk/client.js", }, }, } n.AppendChild(script) return false } for c := n.FirstChild; c != nil; c = c.NextSibling { if keepWalking := f(c); !keepWalking { return false } } return true } f(doc) buff.Reset() if err := html.Render(buff, doc); err != nil { return nil, errors.WithStack(err) } return buff.Bytes(), nil } func (b *ZimBundle) init() error { b.initOnce.Do(func() { reader, err := zim.Open(b.archivePath) if err != nil { b.initErr = errors.Wrapf(err, "could not open '%v'", b.archivePath) return } b.reader = reader cache, err := lru.New[string, zim.Namespace](128) if err != nil { b.initErr = errors.Wrap(err, "could not initialize cache") return } b.urlNamespaceCache = cache }) if b.initErr != nil { return errors.WithStack(b.initErr) } return nil } func NewZimBundle(archivePath string) *ZimBundle { return &ZimBundle{ archivePath: archivePath, } } type zimFile struct { fileInfo *zimFileInfo reader io.ReadCloser } // Close implements fs.File. func (f *zimFile) Close() error { if err := f.reader.Close(); err != nil { return errors.WithStack(err) } return nil } // Read implements fs.File. func (f *zimFile) Read(d []byte) (int, error) { n, err := f.reader.Read(d) if err != nil { if errors.Is(err, io.EOF) { return n, err } return n, errors.WithStack(err) } return n, nil } // Stat implements fs.File. func (f *zimFile) Stat() (fs.FileInfo, error) { return f.fileInfo, nil } var _ fs.File = &zimFile{} type zimFileInfo struct { isDir bool modTime time.Time mode fs.FileMode name string size int64 } // IsDir implements fs.FileInfo. func (i *zimFileInfo) IsDir() bool { return i.isDir } // ModTime implements fs.FileInfo. func (i *zimFileInfo) ModTime() time.Time { return i.modTime } // Mode implements fs.FileInfo. func (i *zimFileInfo) Mode() fs.FileMode { return i.mode } // Name implements fs.FileInfo. func (i *zimFileInfo) Name() string { return i.name } // Size implements fs.FileInfo. func (i *zimFileInfo) Size() int64 { return i.size } // Sys implements fs.FileInfo. func (*zimFileInfo) Sys() any { return nil } var _ fs.FileInfo = &zimFileInfo{}