diff --git a/go.mod b/go.mod index 19c0759..600d040 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,13 @@ module forge.cadoles.com/arcad/edge -go 1.19 +go 1.20 require ( + github.com/hashicorp/golang-lru/v2 v2.0.3 github.com/hashicorp/mdns v1.0.5 + github.com/klauspost/compress v1.16.6 github.com/lestrrat-go/jwx/v2 v2.0.8 + github.com/ulikunitz/xz v0.5.11 modernc.org/sqlite v1.20.4 ) @@ -59,12 +62,12 @@ require ( github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect gitlab.com/wpetit/goweb v0.0.0-20230419082146-a94d9ed7202b go.opencensus.io v0.22.5 // indirect - golang.org/x/crypto v0.7.0 + golang.org/x/crypto v0.10.0 golang.org/x/mod v0.10.0 - golang.org/x/net v0.9.0 // indirect - golang.org/x/sys v0.7.0 // indirect - golang.org/x/term v0.7.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/net v0.11.0 + golang.org/x/sys v0.9.0 // indirect + golang.org/x/term v0.9.0 // indirect + golang.org/x/text v0.10.0 // indirect golang.org/x/tools v0.8.0 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/yaml.v2 v2.4.0 diff --git a/go.sum b/go.sum index e1a0564..c816e87 100644 --- a/go.sum +++ b/go.sum @@ -188,6 +188,8 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad github.com/hashicorp/go.net v0.0.0-20151006203346-104dcad90073/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru/v2 v2.0.3 h1:kmRrRLlInXvng0SmLxmQpQkpbYAvcXm7NPDrgxJa9mE= +github.com/hashicorp/golang-lru/v2 v2.0.3/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/mdns v0.0.0-20151206042412-9d85cf22f9f8/go.mod h1:aa76Av3qgPeIQp9Y3qIkTBPieQYNkQ13Kxe7pze9Wb0= github.com/hashicorp/mdns v1.0.5 h1:1M5hW1cunYeoXOqHwEb/GBDDHAFo0Yqb/uz/beC6LbE= @@ -202,6 +204,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.16.6 h1:91SKEy4K37vkp255cJ8QesJhjyRO0hn9i9G0GoUwLsk= +github.com/klauspost/compress v1.16.6/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= @@ -277,6 +281,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= +github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli/v2 v2.24.3 h1:7Q1w8VN8yE0MJEHP06bv89PjYsN4IHWED2s1v/Zlfm0= github.com/urfave/cli/v2 v2.24.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= @@ -306,8 +312,8 @@ golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220427172511-eb4f295cb31f/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= -golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= +golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= +golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -379,8 +385,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= -golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU= +golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -442,13 +448,13 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= +golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= -golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= +golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -458,8 +464,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= +golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/pkg/bundle/from_path.go b/pkg/bundle/from_path.go index 89448fd..2b2f6ed 100644 --- a/pkg/bundle/from_path.go +++ b/pkg/bundle/from_path.go @@ -13,6 +13,7 @@ type ArchiveExt string const ( ExtZip ArchiveExt = "zip" ExtTarGz ArchiveExt = "tar.gz" + ExtZim ArchiveExt = "zim" ) func FromPath(path string) (Bundle, error) { @@ -56,5 +57,14 @@ func matchArchivePattern(archivePath string) (Bundle, error) { return NewZipBundle(archivePath), nil } + matches, err = filepath.Match(fmt.Sprintf("*.%s", ExtZim), base) + if err != nil { + return nil, errors.Wrapf(err, "could not match file archive '%s'", archivePath) + } + + if matches { + return NewZimBundle(archivePath), nil + } + return nil, errors.WithStack(ErrUnknownBundleArchiveExt) } diff --git a/pkg/bundle/zim/article.go b/pkg/bundle/zim/article.go new file mode 100644 index 0000000..303665a --- /dev/null +++ b/pkg/bundle/zim/article.go @@ -0,0 +1,283 @@ +package zim + +import ( + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "strings" + "sync" + + lru "github.com/hashicorp/golang-lru/v2" +) + +const ( + RedirectEntry uint16 = 0xffff + LinkTargetEntry = 0xfffe + DeletedEntry = 0xfffd +) + +var articlePool sync.Pool + +// the recent uncompressed blobs, mainly useful while indexing and asking +// for the same blob again and again +var bcache *lru.Cache[any, any] + +type Article struct { + // EntryType is a RedirectEntry/LinkTargetEntry/DeletedEntry or an idx + // pointing to ZimReader.mimeTypeList + EntryType uint16 + Title string + URLPtr uint64 + Namespace byte + url string + blob uint32 + cluster uint32 + z *ZimReader +} + +// convenient method to return the Article at URL index idx +func (z *ZimReader) ArticleAtURLIdx(idx uint32) (*Article, error) { + o, err := z.OffsetAtURLIdx(idx) + if err != nil { + return nil, err + } + return z.ArticleAt(o) +} + +// return the article main page if it exists +func (z *ZimReader) MainPage() (*Article, error) { + if z.mainPage == 0xffffffff { + return nil, nil + } + return z.ArticleAtURLIdx(z.mainPage) +} + +// get the article (Directory) pointed by the offset found in URLpos or Titlepos +func (z *ZimReader) ArticleAt(offset uint64) (*Article, error) { + a := articlePool.Get().(*Article) + err := z.FillArticleAt(a, offset) + return a, err +} + +// Fill an article with datas found at offset +func (z *ZimReader) FillArticleAt(a *Article, offset uint64) error { + a.z = z + a.URLPtr = offset + + mimeIdx, err := readInt16(z.bytesRangeAt(offset, offset+2)) + if err != nil { + return fmt.Errorf("can't read article %w", err) + } + a.EntryType = mimeIdx + + // Linktarget or Target Entry + if mimeIdx == LinkTargetEntry || mimeIdx == DeletedEntry { + // TODO + return nil + } + + s, err := z.bytesRangeAt(offset+3, offset+4) + if err != nil { + return err + } + a.Namespace = s[0] + + a.cluster, err = readInt32(z.bytesRangeAt(offset+8, offset+8+4)) + if err != nil { + return err + } + a.blob, err = readInt32(z.bytesRangeAt(offset+12, offset+12+4)) + if err != nil { + return err + } + + // Redirect + if mimeIdx == RedirectEntry { + // assume the url + title won't be longer than 2k + b, err := z.bytesRangeAt(offset+12, offset+12+2048) + if err != nil { + return nil + } + bbuf := bytes.NewBuffer(b) + a.url, err = bbuf.ReadString('\x00') + if err != nil { + return err + } + a.url = strings.TrimRight(a.url, "\x00") + + a.Title, err = bbuf.ReadString('\x00') + if err != nil { + return err + } + a.Title = strings.TrimRight(a.Title, "\x00") + return err + } + + b, err := z.bytesRangeAt(offset+16, offset+16+2048) + if err != nil { + return nil + } + bbuf := bytes.NewBuffer(b) + a.url, err = bbuf.ReadString('\x00') + if err != nil { + return err + } + + a.url = strings.TrimRight(string(a.url), "\x00") + + title, err := bbuf.ReadString('\x00') + if err != nil { + return err + } + title = strings.TrimRight(string(title), "\x00") + // This is a trick to force a copy and avoid retain of the full buffer + // mainly for indexing title reasons + if len(title) != 0 { + a.Title = title[0:1] + title[1:] + } + return nil +} + +// return the uncompressed data associated with this article +func (a *Article) Data() ([]byte, error) { + // ensure we have data to read + if a.EntryType == RedirectEntry || a.EntryType == LinkTargetEntry || a.EntryType == DeletedEntry { + return nil, nil + } + start, end, err := a.z.clusterOffsetsAtIdx(a.cluster) + if err != nil { + return nil, err + } + s, err := a.z.bytesRangeAt(start, start+1) + if err != nil { + return nil, err + } + compression := uint8(s[0]) + + // blob starts at offset, blob ends at offset + var bs, be uint32 + + // LZMA: 4, Zstandard: 5 + if compression == 4 || compression == 5 { + blobLookup := func() ([]byte, bool) { + if v, ok := bcache.Get(a.cluster); ok { + b := v.([]byte) + return b, ok + } + return nil, false + } + + var blob []byte + var ok bool + var dec io.ReadCloser + if blob, ok = blobLookup(); !ok { + b, err := a.z.bytesRangeAt(start+1, end+1) + if err != nil { + return nil, err + } + bbuf := bytes.NewBuffer(b) + switch compression { + case 5: + dec, err = NewZstdReader(bbuf) + + case 4: + dec, err = NewXZReader(bbuf) + } + if err != nil { + return nil, err + } + defer dec.Close() + // the decoded chunk are around 1MB + b, err = ioutil.ReadAll(dec) + if err != nil { + return nil, err + } + blob = make([]byte, len(b)) + copy(blob, b) + // TODO: 2 requests for the same blob could occure at the same time + bcache.Add(a.cluster, blob) + } else { + bi, ok := bcache.Get(a.cluster) + if !ok { + return nil, errors.New("not in cache anymore") + } + blob = bi.([]byte) + } + + bs, err = readInt32(blob[a.blob*4:a.blob*4+4], nil) + if err != nil { + return nil, err + } + be, err = readInt32(blob[a.blob*4+4:a.blob*4+4+4], nil) + if err != nil { + return nil, err + } + + // avoid retaining all the chunk + c := make([]byte, be-bs) + copy(c, blob[bs:be]) + return c, nil + + } else if compression == 0 || compression == 1 { + // uncompresssed + startPos := start + 1 + blobOffset := uint64(a.blob * 4) + + bs, err := readInt32(a.z.bytesRangeAt(startPos+blobOffset, startPos+blobOffset+4)) + if err != nil { + return nil, err + } + + be, err := readInt32(a.z.bytesRangeAt(startPos+blobOffset+4, startPos+blobOffset+4+4)) + if err != nil { + return nil, err + } + + return a.z.bytesRangeAt(startPos+uint64(bs), startPos+uint64(be)) + } + + return nil, errors.New("Unhandled compression") +} + +func (a *Article) MimeType() string { + if a.EntryType == RedirectEntry || a.EntryType == LinkTargetEntry || a.EntryType == DeletedEntry { + return "" + } + + return a.z.mimeTypeList[a.EntryType] +} + +// return the url prefixed by the namespace +func (a *Article) FullURL() string { + return string(a.Namespace) + "/" + a.url +} + +func (a *Article) String() string { + return fmt.Sprintf("Mime: 0x%x URL: [%s], Title: [%s], Cluster: 0x%x Blob: 0x%x", + a.EntryType, a.FullURL(), a.Title, a.cluster, a.blob) +} + +// RedirectIndex return the redirect index of RedirectEntry type article +// return an err if not a redirect entry +func (a *Article) RedirectIndex() (uint32, error) { + if a.EntryType != RedirectEntry { + return 0, errors.New("Not a RedirectEntry") + } + // We use the cluster to save the redirect index position for RedirectEntry type + return a.cluster, nil +} + +func (a *Article) blobOffsetsAtIdx(z *ZimReader) (start, end uint64) { + idx := a.blob + offset := z.clusterPtrPos + uint64(idx)*8 + start, err := readInt64(z.bytesRangeAt(offset, offset+8)) + if err != nil { + return + } + offset = z.clusterPtrPos + uint64(idx+1)*8 + end, _ = readInt64(z.bytesRangeAt(offset, offset+8)) + + return +} diff --git a/pkg/bundle/zim/error.go b/pkg/bundle/zim/error.go new file mode 100644 index 0000000..236a681 --- /dev/null +++ b/pkg/bundle/zim/error.go @@ -0,0 +1,5 @@ +package zim + +import "errors" + +var ErrNotFound = errors.New("not found") diff --git a/pkg/bundle/zim/favicon.go b/pkg/bundle/zim/favicon.go new file mode 100644 index 0000000..10ed048 --- /dev/null +++ b/pkg/bundle/zim/favicon.go @@ -0,0 +1,49 @@ +package zim + +import "github.com/pkg/errors" + +func (z *ZimReader) Favicon() (*Article, error) { + illustration, err := z.getMetadataIllustration() + if err != nil && !errors.Is(err, ErrNotFound) { + return nil, errors.WithStack(err) + } + + if illustration != nil { + return illustration, nil + } + + namespaces := []string{"-", "I"} + entryNames := []string{"favicon", "favicon.png"} + + for _, ns := range namespaces { + for _, en := range entryNames { + article, err := z.GetPageNoIndex(ns + "/" + en) + if err != nil && !errors.Is(err, ErrNotFound) { + return nil, errors.WithStack(err) + } + + if article != nil { + return article, nil + } + } + } + + return nil, errors.WithStack(ErrNotFound) +} + +func (z *ZimReader) getMetadataIllustration() (*Article, error) { + metadata, err := z.Metadata(MetadataIllustration96x96at2, MetadataIllustration48x48at1) + if err != nil { + return nil, errors.WithStack(err) + } + + if _, exists := metadata[MetadataIllustration96x96at2]; exists { + return z.GetPageNoIndex("M/" + string(MetadataIllustration96x96at2)) + } + + if _, exists := metadata[MetadataIllustration48x48at1]; exists { + return z.GetPageNoIndex("M/" + string(MetadataIllustration48x48at1)) + } + + return nil, errors.WithStack(ErrNotFound) +} diff --git a/pkg/bundle/zim/metadata.go b/pkg/bundle/zim/metadata.go new file mode 100644 index 0000000..469e89e --- /dev/null +++ b/pkg/bundle/zim/metadata.go @@ -0,0 +1,69 @@ +package zim + +import ( + "github.com/pkg/errors" +) + +type MetadataKey string + +// See https://wiki.openzim.org/wiki/Metadata +const ( + MetadataName MetadataKey = "Name" + MetadataTitle MetadataKey = "Title" + MetadataDescription MetadataKey = "Description" + MetadataLongDescription MetadataKey = "LongDescription" + MetadataCreator MetadataKey = "Creator" + MetadataTags MetadataKey = "Tags" + MetadataDate MetadataKey = "Date" + MetadataPublisher MetadataKey = "Publisher" + MetadataFlavour MetadataKey = "Flavour" + MetadataSource MetadataKey = "Source" + MetadataLanguage MetadataKey = "Language" + MetadataIllustration48x48at1 MetadataKey = "Illustration_48x48@1" + MetadataIllustration96x96at2 MetadataKey = "Illustration_96x96@2" +) + +var knownKeys = []MetadataKey{ + MetadataName, + MetadataTitle, + MetadataDescription, + MetadataLongDescription, + MetadataCreator, + MetadataPublisher, + MetadataLanguage, + MetadataTags, + MetadataDate, + MetadataFlavour, + MetadataSource, + MetadataIllustration48x48at1, + MetadataIllustration96x96at2, +} + +// Metadata returns a copy of the internal metadata map of the ZIM file. +func (z *ZimReader) Metadata(keys ...MetadataKey) (map[MetadataKey]string, error) { + if len(keys) == 0 { + keys = knownKeys + } + + metadata := make(map[MetadataKey]string) + + for _, key := range keys { + article, err := z.GetPageNoIndex("M/" + string(key)) + if err != nil { + if errors.Is(err, ErrNotFound) { + continue + } + + return nil, errors.WithStack(err) + } + + data, err := article.Data() + if errors.Is(err, ErrNotFound) { + continue + } + + metadata[key] = string(data) + } + + return metadata, nil +} diff --git a/pkg/bundle/zim/testdata/wikibooks_af_all_maxi_2023-06.zim b/pkg/bundle/zim/testdata/wikibooks_af_all_maxi_2023-06.zim new file mode 100644 index 0000000..558e37b Binary files /dev/null and b/pkg/bundle/zim/testdata/wikibooks_af_all_maxi_2023-06.zim differ diff --git a/pkg/bundle/zim/tools.go b/pkg/bundle/zim/tools.go new file mode 100644 index 0000000..3469d2a --- /dev/null +++ b/pkg/bundle/zim/tools.go @@ -0,0 +1,43 @@ +package zim + +import ( + "bytes" + "encoding/binary" +) + +// read a little endian uint64 +func readInt64(b []byte, err error) (v uint64, aerr error) { + if err != nil { + aerr = err + + return + } + buf := bytes.NewBuffer(b) + aerr = binary.Read(buf, binary.LittleEndian, &v) + return +} + +// read a little endian uint32 +func readInt32(b []byte, err error) (v uint32, aerr error) { + if err != nil { + aerr = err + return + } + buf := bytes.NewBuffer(b) + aerr = binary.Read(buf, binary.LittleEndian, &v) + + return +} + +// read a little endian uint32 +func readInt16(b []byte, err error) (v uint16, aerr error) { + if err != nil { + aerr = err + + return + } + buf := bytes.NewBuffer(b) + aerr = binary.Read(buf, binary.LittleEndian, &v) + + return +} diff --git a/pkg/bundle/zim/xz_reader.go b/pkg/bundle/zim/xz_reader.go new file mode 100644 index 0000000..48cdeab --- /dev/null +++ b/pkg/bundle/zim/xz_reader.go @@ -0,0 +1,23 @@ +package zim + +import ( + "io" + + "github.com/ulikunitz/xz" +) + +type XZReader struct { + *xz.Reader +} + +func NewXZReader(r io.Reader) (*XZReader, error) { + dec, err := xz.NewReader(r) + if err != nil { + return nil, err + } + return &XZReader{dec}, nil +} + +func (xr *XZReader) Close() error { + return nil +} diff --git a/pkg/bundle/zim/zim.go b/pkg/bundle/zim/zim.go new file mode 100644 index 0000000..8d51510 --- /dev/null +++ b/pkg/bundle/zim/zim.go @@ -0,0 +1,317 @@ +package zim + +import ( + "bytes" + "fmt" + "io" + "os" + "strings" + "sync" + + lru "github.com/hashicorp/golang-lru/v2" + "github.com/pkg/errors" +) + +const ( + zimHeader = 72173914 +) + +// ZimReader keep tracks of everything related to ZIM reading +type ZimReader struct { + f *os.File + UUID uint32 + ArticleCount uint32 + clusterCount uint32 + urlPtrPos uint64 + titlePtrPos uint64 + clusterPtrPos uint64 + mimeListPos uint64 + mainPage uint32 + layoutPage uint32 + mimeTypeList []string +} + +// create a new zim reader +func NewReader(path string) (*ZimReader, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + z := ZimReader{f: f, mainPage: 0xffffffff, layoutPage: 0xffffffff} + + articlePool = sync.Pool{ + New: func() interface{} { + return new(Article) + }, + } + // keep 4 latest uncompressed blobs, around 1M per blob + bcache, _ = lru.New[any, any](5) + + err = z.readFileHeaders() + return &z, err +} + +// Return an ordered list of mime types present in the ZIM file +func (z *ZimReader) MimeTypes() []string { + if len(z.mimeTypeList) != 0 { + return z.mimeTypeList + } + + var s []string + // assume mime list fit in 2k + b, err := z.bytesRangeAt(z.mimeListPos, z.mimeListPos+2048) + if err != nil { + return s + } + bbuf := bytes.NewBuffer(b) + + for { + line, err := bbuf.ReadBytes('\x00') + if err != nil && err != io.EOF { + return s + } + // a line of 1 is a line containing only \x00 and it's the marker for the + // end of mime types list + if len(line) == 1 { + break + } + s = append(s, strings.TrimRight(string(line), "\x00")) + } + z.mimeTypeList = s + return s +} + +// list all articles, using url index, contained in a zim file +// note that this is a slow implementation, a real iterator is faster +// you are not suppose to use this method on big zim files, use indexes +func (z *ZimReader) ListArticles() <-chan *Article { + ch := make(chan *Article, 10) + + go func() { + var idx uint32 + // starting at 1 to avoid "con" entry + var start uint32 = 1 + + for idx = start; idx < z.ArticleCount; idx++ { + art, err := z.ArticleAtURLIdx(idx) + if err != nil { + continue + } + + if art == nil { + // TODO: deal with redirect continue + } + ch <- art + } + close(ch) + }() + return ch +} + +// list all title pointer, Titles by position contained in a zim file +// Titles are pointers to URLpos index, useful for indexing cause smaller to store: uint32 +// note that this is a slow implementation, a real iterator is faster +// you are not suppose to use this method on big zim files prefer ListTitlesPtrIterator to build your index +func (z *ZimReader) ListTitlesPtr() <-chan uint32 { + ch := make(chan uint32, 10) + + go func() { + var pos uint64 + var count uint32 + + for pos = z.titlePtrPos; count < z.ArticleCount; pos += 4 { + idx, err := readInt32(z.bytesRangeAt(pos, pos+4)) + if err != nil { + continue + } + ch <- idx + count++ + } + close(ch) + }() + return ch +} + +// list all title pointer, Titles by position contained in a zim file +// Titles are pointers to URLpos index, usefull for indexing cause smaller to store: uint32 +func (z *ZimReader) ListTitlesPtrIterator(cb func(uint32)) { + var count uint32 + for pos := z.titlePtrPos; count < z.ArticleCount; pos += 4 { + idx, err := readInt32(z.bytesRangeAt(pos, pos+4)) + if err != nil { + continue + } + cb(idx) + count++ + } +} + +// return the article at the exact url not using any index +func (z *ZimReader) GetPageNoIndex(url string) (*Article, error) { + // starting at 1 to avoid "con" entry + var start uint32 + stop := z.ArticleCount + + a := new(Article) + + for { + pos := (start + stop) / 2 + + offset, err := z.OffsetAtURLIdx(pos) + if err != nil { + return nil, err + } + err = z.FillArticleAt(a, offset) + if err != nil { + return nil, err + } + + if a.FullURL() == url { + return a, nil + } + + if a.FullURL() > url { + stop = pos + } else { + start = pos + } + if stop-start == 1 { + break + } + + } + return nil, errors.WithStack(ErrNotFound) +} + +// get the offset pointing to Article at pos in the URL idx +func (z *ZimReader) OffsetAtURLIdx(idx uint32) (uint64, error) { + offset := z.urlPtrPos + uint64(idx)*8 + return readInt64(z.bytesRangeAt(offset, offset+8)) +} + +// Close & cleanup the zimreader +func (z *ZimReader) Close() error { + return z.f.Close() +} + +func (z *ZimReader) String() string { + fi, err := z.f.Stat() + if err != nil { + return "corrupted zim" + } + return fmt.Sprintf("Size: %d, ArticleCount: %d urlPtrPos: 0x%x titlePtrPos: 0x%x mimeListPos: 0x%x clusterPtrPos: 0x%x\nMimeTypes: %v", + fi.Size(), z.ArticleCount, z.urlPtrPos, z.titlePtrPos, z.mimeListPos, z.clusterPtrPos, z.MimeTypes()) +} + +// getBytesRangeAt returns bytes from start to end +// it's needed to abstract mmap usages rather than read directly on the mmap slices +func (z *ZimReader) bytesRangeAt(start, end uint64) ([]byte, error) { + buf := make([]byte, end-start) + n, err := z.f.ReadAt(buf, int64(start)) + if err != nil { + return nil, fmt.Errorf("can't read bytes %w", err) + } + + if n != int(end-start) { + return nil, errors.New("can't read enough bytes") + } + + return buf, nil +} + +// populate the ZimReader structs with headers +func (z *ZimReader) readFileHeaders() error { + // checking for file type + v, err := readInt32(z.bytesRangeAt(0, 0+4)) + if err != nil || v != zimHeader { + return errors.New("not a ZIM file") + } + + // checking for version + v, err = readInt32(z.bytesRangeAt(4, 4+4)) + if err != nil { + return errors.Wrap(err, "could not read file version") + } + + // checking for articles count + v, err = readInt32(z.bytesRangeAt(8, 16)) + if err != nil { + return err + } + z.UUID = v + + // checking for articles count + v, err = readInt32(z.bytesRangeAt(24, 24+4)) + if err != nil { + return err + } + z.ArticleCount = v + + // checking for cluster count + v, err = readInt32(z.bytesRangeAt(28, 28+4)) + if err != nil { + return err + } + z.clusterCount = v + + // checking for urlPtrPos + vb, err := readInt64(z.bytesRangeAt(32, 32+8)) + if err != nil { + return err + } + z.urlPtrPos = vb + + // checking for titlePtrPos + vb, err = readInt64(z.bytesRangeAt(40, 40+8)) + if err != nil { + return err + } + z.titlePtrPos = vb + + // checking for clusterPtrPos + vb, err = readInt64(z.bytesRangeAt(48, 48+8)) + if err != nil { + return err + } + z.clusterPtrPos = vb + + // checking for mimeListPos + vb, err = readInt64(z.bytesRangeAt(56, 56+8)) + if err != nil { + return err + } + z.mimeListPos = vb + + // checking for mainPage + v, err = readInt32(z.bytesRangeAt(64, 64+4)) + if err != nil { + return err + } + z.mainPage = v + + // checking for layoutPage + v, err = readInt32(z.bytesRangeAt(68, 68+4)) + if err != nil { + return err + } + z.layoutPage = v + + z.MimeTypes() + return nil +} + +// return start and end offsets for cluster at index idx +func (z *ZimReader) clusterOffsetsAtIdx(idx uint32) (start, end uint64, err error) { + offset := z.clusterPtrPos + (uint64(idx) * 8) + start, err = readInt64(z.bytesRangeAt(offset, offset+8)) + if err != nil { + return + } + offset = z.clusterPtrPos + (uint64(idx+1) * 8) + end, err = readInt64(z.bytesRangeAt(offset, offset+8)) + if err != nil { + return + } + end-- + return +} diff --git a/pkg/bundle/zim/zim_test.go b/pkg/bundle/zim/zim_test.go new file mode 100644 index 0000000..3c85463 --- /dev/null +++ b/pkg/bundle/zim/zim_test.go @@ -0,0 +1,150 @@ +package zim + +import ( + "log" + "testing" + + "github.com/pkg/errors" +) + +var Z *ZimReader + +func init() { + var err error + Z, err = NewReader("testdata/wikibooks_af_all_maxi_2023-06.zim") + if err != nil { + log.Panicf("Can't read %v", err) + } +} + +func TestOpen(t *testing.T) { + if Z.ArticleCount == 0 { + t.Errorf("No article found") + } +} + +func TestMime(t *testing.T) { + if len(Z.MimeTypes()) == 0 { + t.Errorf("No mime types found") + } +} + +func TestDisplayInfost(t *testing.T) { + info := Z.String() + if len(info) < 0 { + t.Errorf("Can't read infos") + } + t.Log(info) +} + +func TestURLAtIdx(t *testing.T) { + // addr 0 is a redirect + p, _ := Z.OffsetAtURLIdx(5) + a, _ := Z.ArticleAt(p) + if a == nil { + t.Errorf("Can't find 1st url") + } +} + +func TestDisplayArticle(t *testing.T) { + // addr 0 is a redirect + p, _ := Z.OffsetAtURLIdx(5) + a, _ := Z.ArticleAt(p) + if a == nil { + t.Errorf("Can't find 1st url") + } + + t.Log(a) +} + +func TestPageNoIndex(t *testing.T) { + a, _ := Z.GetPageNoIndex("A/Dracula:Capitol_1.html") + if a == nil { + t.Errorf("Can't find existing url") + } +} + +func TestListArticles(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + var i uint32 + + for a := range Z.ListArticles() { + i++ + t.Log(a.String()) + } + + if i == 0 { + t.Errorf("Can't find any urls") + } + + if i != Z.ArticleCount-1 { + t.Errorf("Can't find the exact ArticleCount urls %d vs %d", i, Z.ArticleCount) + } +} + +func TestMainPage(t *testing.T) { + a, _ := Z.MainPage() + if a == nil { + t.Errorf("Can't find the mainpage article") + } + + t.Log(a) +} + +func TestFavicon(t *testing.T) { + favicon, err := Z.Favicon() + if err != nil { + t.Errorf("%+v", errors.WithStack(err)) + } + if favicon == nil { + t.Errorf("Can't find the favicon article") + } +} + +func TestMetadata(t *testing.T) { + metadata, err := Z.Metadata() + if err != nil { + t.Errorf("%+v", errors.WithStack(err)) + } + if metadata == nil { + t.Errorf("Can't find the metadata") + } +} + +func TestData(t *testing.T) { + // addr 0 is a redirect + p, _ := Z.OffsetAtURLIdx(2) + a, _ := Z.ArticleAt(p) + b, _ := a.Data() + data := string(b) + if a.EntryType != RedirectEntry { + if len(data) == 0 { + t.Error("can't read data") + } + } + t.Log(a.String()) + t.Log(data) +} + +func BenchmarkArticleBytes(b *testing.B) { + // addr 0 is a redirect + p, _ := Z.OffsetAtURLIdx(5) + a, _ := Z.ArticleAt(p) + if a == nil { + b.Errorf("Can't find 1st url") + } + data, err := a.Data() + if err != nil { + b.Error(err) + } + + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + a.Data() + bcache.Purge() // prevent memiozing value + } +} diff --git a/pkg/bundle/zim/zstd_reader.go b/pkg/bundle/zim/zstd_reader.go new file mode 100644 index 0000000..284ac34 --- /dev/null +++ b/pkg/bundle/zim/zstd_reader.go @@ -0,0 +1,26 @@ +package zim + +import ( + "fmt" + "io" + + "github.com/klauspost/compress/zstd" +) + +type ZstdReader struct { + *zstd.Decoder +} + +func NewZstdReader(r io.Reader) (*ZstdReader, error) { + dec, err := zstd.NewReader(r) + if err != nil { + return nil, fmt.Errorf("can't read from zstd %w", err) + } + return &ZstdReader{dec}, nil +} + +func (zr *ZstdReader) Close() error { + zr.Decoder.Close() + + return nil +} diff --git a/pkg/bundle/zim_bundle.go b/pkg/bundle/zim_bundle.go new file mode 100644 index 0000000..f47b884 --- /dev/null +++ b/pkg/bundle/zim_bundle.go @@ -0,0 +1,447 @@ +package bundle + +import ( + "bytes" + "context" + "fmt" + "io" + "io/fs" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "golang.org/x/net/html" + + "forge.cadoles.com/arcad/edge/pkg/bundle/zim" + "github.com/pkg/errors" + "gitlab.com/wpetit/goweb/logger" + "gopkg.in/yaml.v2" +) + +type ZimBundle struct { + archivePath string +} + +func (b *ZimBundle) File(filename string) (io.ReadCloser, os.FileInfo, error) { + ctx := logger.With( + context.Background(), + logger.F("filename", filename), + ) + + logger.Debug(ctx, "opening file") + + switch filename { + case "manifest.yml": + return b.renderFakeManifest(ctx) + case "server/main.js": + return b.renderFakeServerMain(ctx) + case "public": + return b.renderDirectory(ctx, filename) + case "public/index.html": + return b.redirectToMainPage(ctx, filename) + + default: + return b.renderURL(ctx, filename) + } +} + +func (b *ZimBundle) Dir(dirname string) ([]os.FileInfo, error) { + reader, err := b.openArchive() + if err != nil { + return nil, err + } + + defer func() { + if err := reader.Close(); err != nil { + panic(errors.WithStack(err)) + } + }() + + files := make([]os.FileInfo, 0) + // ctx := context.Background() + + // for _, f := range reader.File { + // if !strings.HasPrefix(f.Name, dirname) { + // continue + // } + + // relPath, err := filepath.Rel(dirname, f.Name) + // if err != nil { + // return nil, errors.Wrap(err, "could not get relative path") + // } + + // logger.Debug( + // ctx, "checking file prefix", + // logger.F("dirname", dirname), + // logger.F("filename", f.Name), + // logger.F("relpath", relPath), + // ) + + // if relPath == filepath.Base(f.Name) { + // files = append(files, f.FileInfo()) + // } + // } + + return files, nil +} + +func (b *ZimBundle) renderFakeManifest(ctx context.Context) (io.ReadCloser, os.FileInfo, error) { + reader, err := b.openArchive() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + defer func() { + if err := reader.Close(); err != nil { + panic(errors.WithStack(err)) + } + }() + + metadata, err := reader.Metadata() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + manifest := map[string]any{} + + manifest["version"] = "0.0.0" + + if name, exists := metadata[zim.MetadataName]; exists { + replacer := strings.NewReplacer( + "_", "", + " ", "", + ) + + manifest["id"] = strings.ToLower(replacer.Replace(name)) + ".zim.edge.app" + } else { + manifest["id"] = strconv.FormatUint(uint64(reader.UUID), 10) + ".zim.edge.app" + } + + if title, exists := metadata[zim.MetadataTitle]; exists { + manifest["title"] = title + } else { + manifest["title"] = "Unknown" + } + + if description, exists := metadata[zim.MetadataDescription]; exists { + manifest["description"] = description + } + + favicon, err := reader.Favicon() + if err != nil && !errors.Is(err, zim.ErrNotFound) { + return nil, nil, errors.WithStack(err) + } + + if favicon != nil { + manifestMeta, exists := manifest["metadata"].(map[string]any) + if !exists { + manifestMeta = make(map[string]any) + manifest["metadata"] = manifestMeta + } + + paths, exists := manifestMeta["paths"].(map[string]any) + if !exists { + paths = make(map[string]any) + manifestMeta["paths"] = paths + } + + paths["icon"] = "/" + favicon.FullURL() + } + + data, err := yaml.Marshal(manifest) + if err != nil { + return nil, nil, errors.WithStack(err) + } + + stat := &zimFileInfo{ + isDir: false, + modTime: time.Time{}, + mode: 0, + name: "manifest.yml", + size: int64(len(data)), + } + + buf := bytes.NewBuffer(data) + file := ioutil.NopCloser(buf) + + return file, stat, nil +} + +func (b *ZimBundle) renderFakeServerMain(ctx context.Context) (io.ReadCloser, os.FileInfo, error) { + stat := &zimFileInfo{ + isDir: false, + modTime: time.Time{}, + mode: 0, + name: "server/main.js", + size: 0, + } + + buf := bytes.NewBuffer(nil) + file := ioutil.NopCloser(buf) + + return file, stat, nil +} + +func (b *ZimBundle) renderURL(ctx context.Context, url string) (io.ReadCloser, os.FileInfo, error) { + zr, err := b.openArchive() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + defer func() { + if err := zr.Close(); err != nil { + panic(errors.WithStack(err)) + } + }() + + filename := filepath.Base(url) + url = strings.TrimPrefix(url, "public/") + + article, err := zr.GetPageNoIndex(url) + if err != nil { + if errors.Is(err, zim.ErrNotFound) { + return nil, nil, errors.WithStack(fs.ErrNotExist) + } + + return nil, nil, errors.WithStack(err) + } + + if article.EntryType == zim.RedirectEntry { + redirectIndex, err := article.RedirectIndex() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + ra, err := zr.ArticleAtURLIdx(redirectIndex) + if err != nil { + return nil, nil, errors.WithStack(err) + } + + return b.renderRedirect(ctx, filename, ra.FullURL()) + } + + data, err := article.Data() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + mimeType := article.MimeType() + if mimeType == "text/html" { + injected, err := b.injectEdgeScriptTag(data) + if err != nil { + logger.Error(ctx, "could not inject edge script", logger.E(errors.WithStack(err))) + } else { + data = injected + } + } + + zimFile := &zimFile{ + fileInfo: &zimFileInfo{ + isDir: false, + modTime: time.Time{}, + mode: 0, + name: filename, + size: int64(len(data)), + }, + buff: bytes.NewBuffer(data), + } + + return zimFile, zimFile.fileInfo, nil +} + +func (b *ZimBundle) renderDirectory(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) { + zr, err := b.openArchive() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + defer func() { + if err := zr.Close(); err != nil { + panic(errors.WithStack(err)) + } + }() + + zimFile := &zimFile{ + fileInfo: &zimFileInfo{ + isDir: true, + modTime: time.Time{}, + mode: 0, + name: filename, + size: 0, + }, + buff: bytes.NewBuffer(nil), + } + + return zimFile, zimFile.fileInfo, nil +} + +func (b *ZimBundle) renderRedirect(ctx context.Context, filename string, to string) (io.ReadCloser, os.FileInfo, error) { + logger.Debug(ctx, "rendering redirect", logger.F("url", to)) + + data := fmt.Sprintf(` + +
+ + + + `, to) + + stat := &zimFileInfo{ + isDir: false, + modTime: time.Time{}, + mode: 0, + name: filename, + size: int64(len(data)), + } + + buf := bytes.NewBuffer([]byte(data)) + reader := ioutil.NopCloser(buf) + + return reader, stat, nil +} + +func (b *ZimBundle) redirectToMainPage(ctx context.Context, filename string) (io.ReadCloser, os.FileInfo, error) { + zr, err := b.openArchive() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + defer func() { + if err := zr.Close(); err != nil { + panic(errors.WithStack(err)) + } + }() + + main, err := zr.MainPage() + if err != nil { + return nil, nil, errors.WithStack(err) + } + + return b.renderRedirect(ctx, filename, main.FullURL()) +} + +func (b *ZimBundle) injectEdgeScriptTag(data []byte) ([]byte, error) { + buff := bytes.NewBuffer(data) + doc, err := html.Parse(buff) + if err != nil { + return nil, errors.WithStack(err) + } + + var f func(*html.Node) bool + f = func(n *html.Node) bool { + if n.Type == html.ElementNode && n.Data == "head" { + script := &html.Node{ + Type: html.ElementNode, + Data: "script", + Attr: []html.Attribute{ + { + Key: "src", + Val: "/edge/sdk/client.js", + }, + }, + } + + n.AppendChild(script) + + return false + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + if keepWalking := f(c); !keepWalking { + return false + } + } + + return true + } + + f(doc) + + buff.Reset() + + if err := html.Render(buff, doc); err != nil { + return nil, errors.WithStack(err) + } + + return buff.Bytes(), nil +} + +func (b *ZimBundle) openArchive() (*zim.ZimReader, error) { + zm, err := zim.NewReader(b.archivePath) + if err != nil { + return nil, errors.Wrapf(err, "could not open '%v'", b.archivePath) + } + + return zm, nil +} + +func NewZimBundle(archivePath string) *ZimBundle { + return &ZimBundle{ + archivePath: archivePath, + } +} + +type zimFile struct { + fileInfo *zimFileInfo + buff *bytes.Buffer +} + +// Close implements fs.File. +func (f *zimFile) Close() error { + return nil +} + +// Read implements fs.File. +func (f *zimFile) Read(d []byte) (int, error) { + return f.buff.Read(d) +} + +// Stat implements fs.File. +func (f *zimFile) Stat() (fs.FileInfo, error) { + return f.fileInfo, nil +} + +var _ fs.File = &zimFile{} + +type zimFileInfo struct { + isDir bool + modTime time.Time + mode fs.FileMode + name string + size int64 +} + +// IsDir implements fs.FileInfo. +func (i *zimFileInfo) IsDir() bool { + return i.isDir +} + +// ModTime implements fs.FileInfo. +func (i *zimFileInfo) ModTime() time.Time { + return i.modTime +} + +// Mode implements fs.FileInfo. +func (i *zimFileInfo) Mode() fs.FileMode { + return i.mode +} + +// Name implements fs.FileInfo. +func (i *zimFileInfo) Name() string { + return i.name +} + +// Size implements fs.FileInfo. +func (i *zimFileInfo) Size() int64 { + return i.size +} + +// Sys implements fs.FileInfo. +func (*zimFileInfo) Sys() any { + return nil +} + +var _ fs.FileInfo = &zimFileInfo{}