package zim import ( "bytes" "encoding/binary" "io" "log" "github.com/pkg/errors" ) type zimCompression uint8 const ( zimCompressionNoneZeno zimCompression = 0 zimCompressionNone zimCompression = 1 zimCompressionNoneZLib zimCompression = 2 zimCompressionNoneBZip2 zimCompression = 3 zimCompressionNoneXZ zimCompression = 4 zimCompressionNoneZStandard zimCompression = 5 ) type ContentEntry struct { *BaseEntry mimeType string clusterIndex uint32 blobIndex uint32 } func (e *ContentEntry) Reader() (io.Reader, error) { data := make([]byte, 8) startClusterPtrOffset := e.reader.clusterPtrPos + (uint64(e.clusterIndex) * 8) if err := e.reader.readRange(int64(startClusterPtrOffset), data); err != nil { return nil, errors.WithStack(err) } startClusterOffset, err := readUint64(data, binary.LittleEndian) if err != nil { return nil, errors.WithStack(err) } endClusterPtrOffset := e.reader.clusterPtrPos + (uint64(e.clusterIndex+1) * 8) if err := e.reader.readRange(int64(endClusterPtrOffset), data); err != nil { return nil, errors.WithStack(err) } endClusterOffset, err := readUint64(data, binary.LittleEndian) if err != nil { return nil, errors.WithStack(err) } data = make([]byte, 1) if err := e.reader.readRange(int64(startClusterPtrOffset), data); err != nil { return nil, errors.WithStack(err) } clusterHeader := uint8(data[0]) compression := (clusterHeader << 4) >> 4 extended := (clusterHeader<<3)>>7 == 1 log.Printf("%08b %v %04b %d %d %d", clusterHeader, extended, compression, compression, startClusterOffset, endClusterOffset) switch compression { case uint8(zimCompressionNoneZeno): fallthrough case uint8(zimCompressionNone): case uint8(zimCompressionNoneXZ): case uint8(zimCompressionNoneZStandard): case uint8(zimCompressionNoneZLib): fallthrough case uint8(zimCompressionNoneBZip2): fallthrough default: // return nil, errors.Wrapf(ErrCompressionAlgorithmNotSupported, "unexpected compression algorithm '%d'", compression) } var internal []byte buff := bytes.NewBuffer(internal) // blob starts at offset, blob ends at offset // var bs, be uint32 // // LZMA: 4, Zstandard: 5 // if compression == 4 || compression == 5 { // var blob []byte // var ok bool // var dec io.ReadCloser // if blob, ok = blobLookup(); !ok { // b, err := a.z.bytesRangeAt(start+1, end+1) // if err != nil { // return nil, err // } // bbuf := bytes.NewBuffer(b) // switch compression { // case 5: // dec, err = NewZstdReader(bbuf) // case 4: // dec, err = NewXZReader(bbuf) // } // if err != nil { // return nil, err // } // defer dec.Close() // // the decoded chunk are around 1MB // b, err = ioutil.ReadAll(dec) // if err != nil { // return nil, err // } // blob = make([]byte, len(b)) // copy(blob, b) // // TODO: 2 requests for the same blob could occure at the same time // bcache.Add(a.cluster, blob) // } else { // bi, ok := bcache.Get(a.cluster) // if !ok { // return nil, errors.New("not in cache anymore") // } // blob = bi.([]byte) // } // bs, err = readInt32(blob[a.blob*4:a.blob*4+4], nil) // if err != nil { // return nil, err // } // be, err = readInt32(blob[a.blob*4+4:a.blob*4+4+4], nil) // if err != nil { // return nil, err // } // // avoid retaining all the chunk // c := make([]byte, be-bs) // copy(c, blob[bs:be]) // return c, nil // } else if compression == 0 || compression == 1 { // // uncompresssed // startPos := start + 1 // blobOffset := uint64(a.blob * 4) // bs, err := readInt32(a.z.bytesRangeAt(startPos+blobOffset, startPos+blobOffset+4)) // if err != nil { // return nil, err // } // be, err := readInt32(a.z.bytesRangeAt(startPos+blobOffset+4, startPos+blobOffset+4+4)) // if err != nil { // return nil, err // } // return a.z.bytesRangeAt(startPos+uint64(bs), startPos+uint64(be)) // } return buff, nil } func (e *ContentEntry) Redirect() (*ContentEntry, error) { return e, nil } func (r *Reader) parseContentEntry(offset int64, base *BaseEntry) (*ContentEntry, error) { entry := &ContentEntry{ BaseEntry: base, } data := make([]byte, 2) if err := r.readRange(offset, data); err != nil { return nil, errors.WithStack(err) } mimeTypeIndex, err := readUint16(data, binary.LittleEndian) if err != nil { return nil, errors.WithStack(err) } if mimeTypeIndex >= uint16(len(r.mimeTypes)) { return nil, errors.Errorf("mime type index '%d' greater than mime types length '%d'", mimeTypeIndex, len(r.mimeTypes)) } entry.mimeType = r.mimeTypes[mimeTypeIndex] data = make([]byte, 1) if err := r.readRange(offset+3, data); err != nil { return nil, errors.WithStack(err) } entry.namespace = Namespace(data[0]) data = make([]byte, 4) if err := r.readRange(offset+8, data); err != nil { return nil, errors.WithStack(err) } clusterIndex, err := readUint32(data, binary.LittleEndian) if err != nil { return nil, errors.WithStack(err) } entry.clusterIndex = clusterIndex if err := r.readRange(offset+12, data); err != nil { return nil, errors.WithStack(err) } blobIndex, err := readUint32(data, binary.LittleEndian) if err != nil { return nil, errors.WithStack(err) } entry.blobIndex = blobIndex url, read, err := r.readStringAt(offset + 16) if err != nil { return nil, errors.WithStack(err) } entry.url = url title, _, err := r.readStringAt(offset + 16 + read) if err != nil { return nil, errors.WithStack(err) } entry.title = title return entry, nil }