Futher optimize json parsing and editing performance
This commit is contained in:
parent
1e78491cb2
commit
6c9accb628
|
@ -1,21 +1,18 @@
|
|||
package json
|
||||
package ajson
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
func Filter(w *bytes.Buffer, b []byte, keys []string) error {
|
||||
state := expectKey
|
||||
var err error
|
||||
|
||||
kmap := make(map[[20]byte]struct{}, len(keys))
|
||||
kmap := make(map[uint64]struct{}, len(keys))
|
||||
|
||||
for _, k := range keys {
|
||||
h := sha1.Sum([]byte(k))
|
||||
if _, ok := kmap[h]; !ok {
|
||||
kmap[h] = struct{}{}
|
||||
}
|
||||
for i := range keys {
|
||||
kmap[xxhash.Sum64String(keys[i])] = struct{}{}
|
||||
}
|
||||
|
||||
// is an list
|
||||
|
@ -29,7 +26,8 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
|
|||
|
||||
s, e, d := 0, 0, 0
|
||||
|
||||
kf := false
|
||||
var k []byte
|
||||
state := expectKey
|
||||
|
||||
for i := 0; i < len(b); i++ {
|
||||
if state == expectObjClose || state == expectListClose {
|
||||
|
@ -67,8 +65,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
|
|||
}
|
||||
case state == expectKeyClose && b[i] == '"':
|
||||
state = expectColon
|
||||
k := b[(s + 1):i]
|
||||
_, kf = kmap[sha1.Sum(k)]
|
||||
k = b[(s + 1):i]
|
||||
|
||||
case state == expectColon && b[i] == ':':
|
||||
state = expectValue
|
||||
|
@ -115,7 +112,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
|
|||
cb := b[s:(e + 1)]
|
||||
e = 0
|
||||
|
||||
if !kf {
|
||||
if _, ok := kmap[xxhash.Sum64(k)]; !ok {
|
||||
continue
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
package json
|
||||
package ajson
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -21,27 +21,35 @@ type Field struct {
|
|||
Value []byte
|
||||
}
|
||||
|
||||
func Value(b []byte) []byte {
|
||||
e := (len(b) - 1)
|
||||
switch {
|
||||
case b[0] == '"' && b[e] == '"':
|
||||
return b[1:(len(b) - 1)]
|
||||
case b[0] == '[' && b[e] == ']':
|
||||
return nil
|
||||
case b[0] == '{' && b[e] == '}':
|
||||
return nil
|
||||
default:
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
func Get(b []byte, keys [][]byte) []Field {
|
||||
s := 0
|
||||
state := expectKey
|
||||
kmap := make(map[uint64]struct{}, len(keys))
|
||||
|
||||
kmap := make(map[[20]byte]struct{}, len(keys))
|
||||
|
||||
for _, k := range keys {
|
||||
h := sha1.Sum(k)
|
||||
if _, ok := kmap[h]; !ok {
|
||||
kmap[h] = struct{}{}
|
||||
}
|
||||
for i := range keys {
|
||||
kmap[xxhash.Sum64(keys[i])] = struct{}{}
|
||||
}
|
||||
|
||||
prealloc := 20
|
||||
res := make([]Field, prealloc)
|
||||
res := make([]Field, 20)
|
||||
|
||||
s, e, d := 0, 0, 0
|
||||
|
||||
var kf bool
|
||||
var k []byte
|
||||
state := expectKey
|
||||
|
||||
n := 0
|
||||
for i := 0; i < len(b); i++ {
|
||||
if state == expectObjClose || state == expectListClose {
|
||||
switch b[i] {
|
||||
|
@ -60,7 +68,6 @@ func Get(b []byte, keys [][]byte) []Field {
|
|||
case state == expectKeyClose && b[i] == '"':
|
||||
state = expectColon
|
||||
k = b[(s + 1):i]
|
||||
_, kf = kmap[sha1.Sum(k)]
|
||||
|
||||
case state == expectColon && b[i] == ':':
|
||||
state = expectValue
|
||||
|
@ -110,13 +117,11 @@ func Get(b []byte, keys [][]byte) []Field {
|
|||
}
|
||||
|
||||
if e != 0 {
|
||||
if kf {
|
||||
if len(res) == cap(res) {
|
||||
r := make([]Field, 0, (len(res) * 2))
|
||||
copy(r, res)
|
||||
res = r
|
||||
}
|
||||
res = append(res, Field{k, b[s:(e + 1)]})
|
||||
_, ok := kmap[xxhash.Sum64(k)]
|
||||
|
||||
if ok {
|
||||
res[n] = Field{k, b[s:(e + 1)]}
|
||||
n++
|
||||
}
|
||||
|
||||
state = expectKey
|
||||
|
@ -124,5 +129,5 @@ func Get(b []byte, keys [][]byte) []Field {
|
|||
}
|
||||
}
|
||||
|
||||
return res
|
||||
return res[:n]
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package json
|
||||
package ajson
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
@ -130,7 +130,7 @@ var (
|
|||
}`
|
||||
|
||||
input4 = `
|
||||
[{
|
||||
{ "users" : [{
|
||||
"id": 1,
|
||||
"full_name": "Sidney Stroman",
|
||||
"email": "user0@demo.com",
|
||||
|
@ -148,7 +148,7 @@ var (
|
|||
"full_name": "Jerry Dickinson",
|
||||
"email": "user1@demo.com",
|
||||
"__twitter_id": [{ "name": "hello" }, { "name": "world"}]
|
||||
}]`
|
||||
}] }`
|
||||
)
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
|
@ -194,6 +194,28 @@ func TestGet(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestValue(t *testing.T) {
|
||||
v1 := []byte("12345")
|
||||
if !bytes.Equal(Value(v1), v1) {
|
||||
t.Fatal("Number value invalid")
|
||||
}
|
||||
|
||||
v2 := []byte(`"12345"`)
|
||||
if !bytes.Equal(Value(v2), []byte(`12345`)) {
|
||||
t.Fatal("String value invalid")
|
||||
}
|
||||
|
||||
v3 := []byte(`{ "hello": "world" }`)
|
||||
if Value(v3) != nil {
|
||||
t.Fatal("Object value is not nil", Value(v3))
|
||||
}
|
||||
|
||||
v4 := []byte(`[ "hello", "world" ]`)
|
||||
if Value(v4) != nil {
|
||||
t.Fatal("List value is not nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
var b bytes.Buffer
|
||||
Filter(&b, []byte(input2), []string{"id", "full_name", "embed"})
|
||||
|
@ -206,13 +228,22 @@ func TestFilter(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestStrip(t *testing.T) {
|
||||
value := Strip([]byte(input3), []string{"data", "users"})
|
||||
path1 := [][]byte{[]byte("data"), []byte("users")}
|
||||
value1 := Strip([]byte(input3), path1)
|
||||
|
||||
expected := []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)
|
||||
|
||||
if bytes.Equal(value, expected) == false {
|
||||
t.Log(value)
|
||||
t.Error("Does not match expected json")
|
||||
if bytes.Equal(value1, expected) == false {
|
||||
t.Log(value1)
|
||||
t.Error("[Valid path] Does not match expected json")
|
||||
}
|
||||
|
||||
path2 := [][]byte{[]byte("boo"), []byte("hoo")}
|
||||
value2 := Strip([]byte(input3), path2)
|
||||
|
||||
if bytes.Equal(value2, []byte(input3)) == false {
|
||||
t.Log(value2)
|
||||
t.Error("[Invalid path] Does not match expected json")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -229,7 +260,7 @@ func TestReplace(t *testing.T) {
|
|||
{[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)},
|
||||
}
|
||||
|
||||
expected := `[{
|
||||
expected := `{ "users" : [{
|
||||
"id": 1,
|
||||
"full_name": "Sidney Stroman",
|
||||
"email": "user0@demo.com",
|
||||
|
@ -247,7 +278,7 @@ func TestReplace(t *testing.T) {
|
|||
"full_name": "Jerry Dickinson",
|
||||
"email": "user1@demo.com",
|
||||
"__twitter_id":"1234567890"
|
||||
}]`
|
||||
}] }`
|
||||
|
||||
err := Replace(&buf, []byte(input4), from, to)
|
||||
if err != nil {
|
||||
|
@ -255,6 +286,23 @@ func TestReplace(t *testing.T) {
|
|||
}
|
||||
|
||||
if buf.String() != expected {
|
||||
t.Log(buf.String())
|
||||
t.Error("Does not match expected json")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceEmpty(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
|
||||
json := `{ "users" : [{"id":1,"full_name":"Sidney Stroman","email":"user0@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":2,"full_name":"Jerry Dickinson","email":"user1@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":3,"full_name":"Kenna Cassin","email":"user2@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":4,"full_name":"Mr. Pat Parisian","email":"rodney@kautzer.biz","__users_twitter_id":"2048666903444506956"}, {"id":5,"full_name":"Bette Ebert","email":"janeenrath@goyette.com","__users_twitter_id":"2048666903444506956"}, {"id":6,"full_name":"Everett Kiehn","email":"michael@bartoletti.com","__users_twitter_id":"2048666903444506956"}, {"id":7,"full_name":"Katrina Cronin","email":"loretaklocko@framivolkman.org","__users_twitter_id":"2048666903444506956"}, {"id":8,"full_name":"Caroll Orn Sr.","email":"joannarau@hegmann.io","__users_twitter_id":"2048666903444506956"}, {"id":9,"full_name":"Gwendolyn Ziemann","email":"renaytoy@rutherford.co","__users_twitter_id":"2048666903444506956"}, {"id":10,"full_name":"Mrs. Rosann Fritsch","email":"holliemosciski@thiel.org","__users_twitter_id":"2048666903444506956"}, {"id":11,"full_name":"Arden Koss","email":"cristobalankunding@howewelch.org","__users_twitter_id":"2048666903444506956"}, {"id":12,"full_name":"Brenton Bauch PhD","email":"renee@miller.co","__users_twitter_id":"2048666903444506956"}, {"id":13,"full_name":"Daine Gleichner","email":"andrea@nienow.co","__users_twitter_id":"2048666903444506956"}] }`
|
||||
|
||||
err := Replace(&buf, []byte(json), []Field{}, []Field{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if buf.String() != json {
|
||||
t.Log(buf.String())
|
||||
t.Error("Does not match expected json")
|
||||
}
|
||||
}
|
||||
|
@ -284,10 +332,11 @@ func BenchmarkFilter(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkStrip(b *testing.B) {
|
||||
path := [][]byte{[]byte("data"), []byte("users")}
|
||||
b.ReportAllocs()
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
Strip([]byte(input3), []string{"data", "users"})
|
||||
Strip([]byte(input3), path)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
package json
|
||||
package ajson
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha1"
|
||||
"errors"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
|
||||
|
@ -11,31 +12,25 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
|
|||
return errors.New("'from' and 'to' must be of the same length")
|
||||
}
|
||||
|
||||
fmap := make(map[[20]byte]int, (len(from) * 2))
|
||||
tmap := make(map[[20]byte]int, (len(from)))
|
||||
h := xxhash.New()
|
||||
tmap := make(map[uint64]int, len(from))
|
||||
|
||||
for i, f := range from {
|
||||
h1 := sha1.Sum(f.Key)
|
||||
n, ok := fmap[h1]
|
||||
if !ok {
|
||||
fmap[h1] = i
|
||||
n = i
|
||||
}
|
||||
h.Write(f.Key)
|
||||
h.Write(f.Value)
|
||||
|
||||
h2 := sha1.Sum(f.Value)
|
||||
fmap[h2] = n
|
||||
tmap[h2] = i
|
||||
tmap[h.Sum64()] = i
|
||||
h.Reset()
|
||||
}
|
||||
|
||||
state := expectKey
|
||||
ws, we := 0, len(b)
|
||||
|
||||
s, e, d := 0, 0, 0
|
||||
fi := -1
|
||||
|
||||
state := expectKey
|
||||
ws, we := -1, len(b)
|
||||
|
||||
for i := 0; i < len(b); i++ {
|
||||
// skip any left padding whitespace
|
||||
if ws == 0 && (b[i] == '{' || b[i] == '[') {
|
||||
if ws == -1 && (b[i] == '{' || b[i] == '[') {
|
||||
ws = i
|
||||
}
|
||||
|
||||
|
@ -55,11 +50,8 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
|
|||
|
||||
case state == expectKeyClose && b[i] == '"':
|
||||
state = expectColon
|
||||
h1 := sha1.Sum(b[(s + 1):i])
|
||||
if n, ok := fmap[h1]; ok {
|
||||
we = s
|
||||
fi = n
|
||||
}
|
||||
h.Write(b[(s + 1):i])
|
||||
we = s
|
||||
|
||||
case state == expectColon && b[i] == ':':
|
||||
state = expectValue
|
||||
|
@ -109,43 +101,58 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
|
|||
if e != 0 {
|
||||
e++
|
||||
|
||||
h2 := sha1.Sum(b[s:e])
|
||||
replace := false
|
||||
h.Write(b[s:e])
|
||||
n, ok := tmap[h.Sum64()]
|
||||
h.Reset()
|
||||
|
||||
if n, ok1 := fmap[h2]; ok1 && n == fi {
|
||||
ti, ok2 := tmap[h2]
|
||||
if ok {
|
||||
if _, err := w.Write(b[ws:(we + 1)]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ok2 {
|
||||
if _, err := w.Write(b[ws:(we + 1)]); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(to[ti].Key); err != nil {
|
||||
if len(to[n].Key) != 0 {
|
||||
var err error
|
||||
|
||||
if _, err := w.Write(to[n].Key); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(`":`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(to[ti].Value); err != nil {
|
||||
if len(to[n].Value) != 0 {
|
||||
_, err = w.Write(to[n].Value)
|
||||
} else {
|
||||
_, err = w.WriteString("null")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
replace = true
|
||||
|
||||
ws = e
|
||||
} else if b[e] == ',' {
|
||||
ws = e + 1
|
||||
} else {
|
||||
ws = e
|
||||
}
|
||||
}
|
||||
|
||||
if !replace && (b[s] == '[' || b[s] == '{') {
|
||||
if !ok && (b[s] == '[' || b[s] == '{') {
|
||||
// the i++ in the for loop will add 1 so we account for that (s - 1)
|
||||
i = s - 1
|
||||
}
|
||||
|
||||
state = expectKey
|
||||
we = len(b)
|
||||
fi = -1
|
||||
e = 0
|
||||
d = 0
|
||||
}
|
||||
}
|
||||
|
||||
w.Write(b[ws:we])
|
||||
if ws == -1 || (ws == 0 && we == len(b)) {
|
||||
w.Write(b)
|
||||
} else {
|
||||
w.Write(b[ws:we])
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,21 +1,16 @@
|
|||
package json
|
||||
package ajson
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
func Strip(b []byte, path []string) []byte {
|
||||
s := 0
|
||||
state := expectKey
|
||||
|
||||
kb := make([][]byte, 0, len(path))
|
||||
for _, k := range path {
|
||||
kb = append(kb, []byte(k))
|
||||
}
|
||||
|
||||
func Strip(b []byte, path [][]byte) []byte {
|
||||
s, e, d := 0, 0, 0
|
||||
ki := 0
|
||||
|
||||
ob := b
|
||||
pi := 0
|
||||
pm := false
|
||||
state := expectKey
|
||||
|
||||
for i := 0; i < len(b); i++ {
|
||||
if state == expectObjClose || state == expectListClose {
|
||||
|
@ -34,12 +29,12 @@ func Strip(b []byte, path []string) []byte {
|
|||
|
||||
case state == expectKeyClose && b[i] == '"':
|
||||
state = expectColon
|
||||
if ki == len(kb) {
|
||||
ki = 0
|
||||
if pi == len(path) {
|
||||
pi = 0
|
||||
}
|
||||
pm = bytes.Equal(b[(s+1):i], kb[ki])
|
||||
pm = bytes.Equal(b[(s+1):i], path[pi])
|
||||
if pm {
|
||||
ki++
|
||||
pi++
|
||||
}
|
||||
|
||||
case state == expectColon && b[i] == ':':
|
||||
|
@ -92,7 +87,7 @@ func Strip(b []byte, path []string) []byte {
|
|||
b = b[s:(e + 1)]
|
||||
i = 0
|
||||
|
||||
if ki == len(kb) {
|
||||
if pi == len(path) {
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
@ -102,5 +97,5 @@ func Strip(b []byte, path []string) []byte {
|
|||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return ob
|
||||
}
|
Loading…
Reference in New Issue