Futher optimize json parsing and editing performance

This commit is contained in:
Vikram Rangnekar 2019-05-12 01:36:52 -04:00
parent 1e78491cb2
commit 6c9accb628
5 changed files with 153 additions and 100 deletions

View File

@ -1,21 +1,18 @@
package json package ajson
import ( import (
"bytes" "bytes"
"crypto/sha1"
"github.com/cespare/xxhash/v2"
) )
func Filter(w *bytes.Buffer, b []byte, keys []string) error { func Filter(w *bytes.Buffer, b []byte, keys []string) error {
state := expectKey
var err error var err error
kmap := make(map[[20]byte]struct{}, len(keys)) kmap := make(map[uint64]struct{}, len(keys))
for _, k := range keys { for i := range keys {
h := sha1.Sum([]byte(k)) kmap[xxhash.Sum64String(keys[i])] = struct{}{}
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
} }
// is an list // is an list
@ -29,7 +26,8 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
s, e, d := 0, 0, 0 s, e, d := 0, 0, 0
kf := false var k []byte
state := expectKey
for i := 0; i < len(b); i++ { for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose { if state == expectObjClose || state == expectListClose {
@ -67,8 +65,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
} }
case state == expectKeyClose && b[i] == '"': case state == expectKeyClose && b[i] == '"':
state = expectColon state = expectColon
k := b[(s + 1):i] k = b[(s + 1):i]
_, kf = kmap[sha1.Sum(k)]
case state == expectColon && b[i] == ':': case state == expectColon && b[i] == ':':
state = expectValue state = expectValue
@ -115,7 +112,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
cb := b[s:(e + 1)] cb := b[s:(e + 1)]
e = 0 e = 0
if !kf { if _, ok := kmap[xxhash.Sum64(k)]; !ok {
continue continue
} }

View File

@ -1,7 +1,7 @@
package json package ajson
import ( import (
"crypto/sha1" "github.com/cespare/xxhash/v2"
) )
const ( const (
@ -21,27 +21,35 @@ type Field struct {
Value []byte Value []byte
} }
func Value(b []byte) []byte {
e := (len(b) - 1)
switch {
case b[0] == '"' && b[e] == '"':
return b[1:(len(b) - 1)]
case b[0] == '[' && b[e] == ']':
return nil
case b[0] == '{' && b[e] == '}':
return nil
default:
return b
}
}
func Get(b []byte, keys [][]byte) []Field { func Get(b []byte, keys [][]byte) []Field {
s := 0 kmap := make(map[uint64]struct{}, len(keys))
state := expectKey
kmap := make(map[[20]byte]struct{}, len(keys)) for i := range keys {
kmap[xxhash.Sum64(keys[i])] = struct{}{}
for _, k := range keys {
h := sha1.Sum(k)
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
} }
prealloc := 20 res := make([]Field, 20)
res := make([]Field, prealloc)
s, e, d := 0, 0, 0 s, e, d := 0, 0, 0
var kf bool
var k []byte var k []byte
state := expectKey
n := 0
for i := 0; i < len(b); i++ { for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose { if state == expectObjClose || state == expectListClose {
switch b[i] { switch b[i] {
@ -60,7 +68,6 @@ func Get(b []byte, keys [][]byte) []Field {
case state == expectKeyClose && b[i] == '"': case state == expectKeyClose && b[i] == '"':
state = expectColon state = expectColon
k = b[(s + 1):i] k = b[(s + 1):i]
_, kf = kmap[sha1.Sum(k)]
case state == expectColon && b[i] == ':': case state == expectColon && b[i] == ':':
state = expectValue state = expectValue
@ -110,13 +117,11 @@ func Get(b []byte, keys [][]byte) []Field {
} }
if e != 0 { if e != 0 {
if kf { _, ok := kmap[xxhash.Sum64(k)]
if len(res) == cap(res) {
r := make([]Field, 0, (len(res) * 2)) if ok {
copy(r, res) res[n] = Field{k, b[s:(e + 1)]}
res = r n++
}
res = append(res, Field{k, b[s:(e + 1)]})
} }
state = expectKey state = expectKey
@ -124,5 +129,5 @@ func Get(b []byte, keys [][]byte) []Field {
} }
} }
return res return res[:n]
} }

View File

@ -1,4 +1,4 @@
package json package ajson
import ( import (
"bytes" "bytes"
@ -130,7 +130,7 @@ var (
}` }`
input4 = ` input4 = `
[{ { "users" : [{
"id": 1, "id": 1,
"full_name": "Sidney Stroman", "full_name": "Sidney Stroman",
"email": "user0@demo.com", "email": "user0@demo.com",
@ -148,7 +148,7 @@ var (
"full_name": "Jerry Dickinson", "full_name": "Jerry Dickinson",
"email": "user1@demo.com", "email": "user1@demo.com",
"__twitter_id": [{ "name": "hello" }, { "name": "world"}] "__twitter_id": [{ "name": "hello" }, { "name": "world"}]
}]` }] }`
) )
func TestGet(t *testing.T) { func TestGet(t *testing.T) {
@ -194,6 +194,28 @@ func TestGet(t *testing.T) {
} }
} }
func TestValue(t *testing.T) {
v1 := []byte("12345")
if !bytes.Equal(Value(v1), v1) {
t.Fatal("Number value invalid")
}
v2 := []byte(`"12345"`)
if !bytes.Equal(Value(v2), []byte(`12345`)) {
t.Fatal("String value invalid")
}
v3 := []byte(`{ "hello": "world" }`)
if Value(v3) != nil {
t.Fatal("Object value is not nil", Value(v3))
}
v4 := []byte(`[ "hello", "world" ]`)
if Value(v4) != nil {
t.Fatal("List value is not nil")
}
}
func TestFilter(t *testing.T) { func TestFilter(t *testing.T) {
var b bytes.Buffer var b bytes.Buffer
Filter(&b, []byte(input2), []string{"id", "full_name", "embed"}) Filter(&b, []byte(input2), []string{"id", "full_name", "embed"})
@ -206,13 +228,22 @@ func TestFilter(t *testing.T) {
} }
func TestStrip(t *testing.T) { func TestStrip(t *testing.T) {
value := Strip([]byte(input3), []string{"data", "users"}) path1 := [][]byte{[]byte("data"), []byte("users")}
value1 := Strip([]byte(input3), path1)
expected := []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`) expected := []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)
if bytes.Equal(value, expected) == false { if bytes.Equal(value1, expected) == false {
t.Log(value) t.Log(value1)
t.Error("Does not match expected json") t.Error("[Valid path] Does not match expected json")
}
path2 := [][]byte{[]byte("boo"), []byte("hoo")}
value2 := Strip([]byte(input3), path2)
if bytes.Equal(value2, []byte(input3)) == false {
t.Log(value2)
t.Error("[Invalid path] Does not match expected json")
} }
} }
@ -229,7 +260,7 @@ func TestReplace(t *testing.T) {
{[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)}, {[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)},
} }
expected := `[{ expected := `{ "users" : [{
"id": 1, "id": 1,
"full_name": "Sidney Stroman", "full_name": "Sidney Stroman",
"email": "user0@demo.com", "email": "user0@demo.com",
@ -247,7 +278,7 @@ func TestReplace(t *testing.T) {
"full_name": "Jerry Dickinson", "full_name": "Jerry Dickinson",
"email": "user1@demo.com", "email": "user1@demo.com",
"__twitter_id":"1234567890" "__twitter_id":"1234567890"
}]` }] }`
err := Replace(&buf, []byte(input4), from, to) err := Replace(&buf, []byte(input4), from, to)
if err != nil { if err != nil {
@ -255,6 +286,23 @@ func TestReplace(t *testing.T) {
} }
if buf.String() != expected { if buf.String() != expected {
t.Log(buf.String())
t.Error("Does not match expected json")
}
}
func TestReplaceEmpty(t *testing.T) {
var buf bytes.Buffer
json := `{ "users" : [{"id":1,"full_name":"Sidney Stroman","email":"user0@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":2,"full_name":"Jerry Dickinson","email":"user1@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":3,"full_name":"Kenna Cassin","email":"user2@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":4,"full_name":"Mr. Pat Parisian","email":"rodney@kautzer.biz","__users_twitter_id":"2048666903444506956"}, {"id":5,"full_name":"Bette Ebert","email":"janeenrath@goyette.com","__users_twitter_id":"2048666903444506956"}, {"id":6,"full_name":"Everett Kiehn","email":"michael@bartoletti.com","__users_twitter_id":"2048666903444506956"}, {"id":7,"full_name":"Katrina Cronin","email":"loretaklocko@framivolkman.org","__users_twitter_id":"2048666903444506956"}, {"id":8,"full_name":"Caroll Orn Sr.","email":"joannarau@hegmann.io","__users_twitter_id":"2048666903444506956"}, {"id":9,"full_name":"Gwendolyn Ziemann","email":"renaytoy@rutherford.co","__users_twitter_id":"2048666903444506956"}, {"id":10,"full_name":"Mrs. Rosann Fritsch","email":"holliemosciski@thiel.org","__users_twitter_id":"2048666903444506956"}, {"id":11,"full_name":"Arden Koss","email":"cristobalankunding@howewelch.org","__users_twitter_id":"2048666903444506956"}, {"id":12,"full_name":"Brenton Bauch PhD","email":"renee@miller.co","__users_twitter_id":"2048666903444506956"}, {"id":13,"full_name":"Daine Gleichner","email":"andrea@nienow.co","__users_twitter_id":"2048666903444506956"}] }`
err := Replace(&buf, []byte(json), []Field{}, []Field{})
if err != nil {
t.Fatal(err)
}
if buf.String() != json {
t.Log(buf.String())
t.Error("Does not match expected json") t.Error("Does not match expected json")
} }
} }
@ -284,10 +332,11 @@ func BenchmarkFilter(b *testing.B) {
} }
func BenchmarkStrip(b *testing.B) { func BenchmarkStrip(b *testing.B) {
path := [][]byte{[]byte("data"), []byte("users")}
b.ReportAllocs() b.ReportAllocs()
for n := 0; n < b.N; n++ { for n := 0; n < b.N; n++ {
Strip([]byte(input3), []string{"data", "users"}) Strip([]byte(input3), path)
} }
} }

View File

@ -1,9 +1,10 @@
package json package ajson
import ( import (
"bytes" "bytes"
"crypto/sha1"
"errors" "errors"
"github.com/cespare/xxhash/v2"
) )
func Replace(w *bytes.Buffer, b []byte, from, to []Field) error { func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
@ -11,31 +12,25 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
return errors.New("'from' and 'to' must be of the same length") return errors.New("'from' and 'to' must be of the same length")
} }
fmap := make(map[[20]byte]int, (len(from) * 2)) h := xxhash.New()
tmap := make(map[[20]byte]int, (len(from))) tmap := make(map[uint64]int, len(from))
for i, f := range from { for i, f := range from {
h1 := sha1.Sum(f.Key) h.Write(f.Key)
n, ok := fmap[h1] h.Write(f.Value)
if !ok {
fmap[h1] = i
n = i
}
h2 := sha1.Sum(f.Value) tmap[h.Sum64()] = i
fmap[h2] = n h.Reset()
tmap[h2] = i
} }
state := expectKey
ws, we := 0, len(b)
s, e, d := 0, 0, 0 s, e, d := 0, 0, 0
fi := -1
state := expectKey
ws, we := -1, len(b)
for i := 0; i < len(b); i++ { for i := 0; i < len(b); i++ {
// skip any left padding whitespace // skip any left padding whitespace
if ws == 0 && (b[i] == '{' || b[i] == '[') { if ws == -1 && (b[i] == '{' || b[i] == '[') {
ws = i ws = i
} }
@ -55,11 +50,8 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
case state == expectKeyClose && b[i] == '"': case state == expectKeyClose && b[i] == '"':
state = expectColon state = expectColon
h1 := sha1.Sum(b[(s + 1):i]) h.Write(b[(s + 1):i])
if n, ok := fmap[h1]; ok { we = s
we = s
fi = n
}
case state == expectColon && b[i] == ':': case state == expectColon && b[i] == ':':
state = expectValue state = expectValue
@ -109,43 +101,58 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
if e != 0 { if e != 0 {
e++ e++
h2 := sha1.Sum(b[s:e]) h.Write(b[s:e])
replace := false n, ok := tmap[h.Sum64()]
h.Reset()
if n, ok1 := fmap[h2]; ok1 && n == fi { if ok {
ti, ok2 := tmap[h2] if _, err := w.Write(b[ws:(we + 1)]); err != nil {
return err
}
if ok2 { if len(to[n].Key) != 0 {
if _, err := w.Write(b[ws:(we + 1)]); err != nil { var err error
return err
} if _, err := w.Write(to[n].Key); err != nil {
if _, err := w.Write(to[ti].Key); err != nil {
return err return err
} }
if _, err := w.WriteString(`":`); err != nil { if _, err := w.WriteString(`":`); err != nil {
return err return err
} }
if _, err := w.Write(to[ti].Value); err != nil { if len(to[n].Value) != 0 {
_, err = w.Write(to[n].Value)
} else {
_, err = w.WriteString("null")
}
if err != nil {
return err return err
} }
replace = true
ws = e
} else if b[e] == ',' {
ws = e + 1
} else {
ws = e ws = e
} }
} }
if !replace && (b[s] == '[' || b[s] == '{') { if !ok && (b[s] == '[' || b[s] == '{') {
// the i++ in the for loop will add 1 so we account for that (s - 1) // the i++ in the for loop will add 1 so we account for that (s - 1)
i = s - 1 i = s - 1
} }
state = expectKey state = expectKey
we = len(b) we = len(b)
fi = -1
e = 0 e = 0
d = 0 d = 0
} }
} }
w.Write(b[ws:we]) if ws == -1 || (ws == 0 && we == len(b)) {
w.Write(b)
} else {
w.Write(b[ws:we])
}
return nil return nil
} }

View File

@ -1,21 +1,16 @@
package json package ajson
import ( import (
"bytes" "bytes"
) )
func Strip(b []byte, path []string) []byte { func Strip(b []byte, path [][]byte) []byte {
s := 0
state := expectKey
kb := make([][]byte, 0, len(path))
for _, k := range path {
kb = append(kb, []byte(k))
}
s, e, d := 0, 0, 0 s, e, d := 0, 0, 0
ki := 0
ob := b
pi := 0
pm := false pm := false
state := expectKey
for i := 0; i < len(b); i++ { for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose { if state == expectObjClose || state == expectListClose {
@ -34,12 +29,12 @@ func Strip(b []byte, path []string) []byte {
case state == expectKeyClose && b[i] == '"': case state == expectKeyClose && b[i] == '"':
state = expectColon state = expectColon
if ki == len(kb) { if pi == len(path) {
ki = 0 pi = 0
} }
pm = bytes.Equal(b[(s+1):i], kb[ki]) pm = bytes.Equal(b[(s+1):i], path[pi])
if pm { if pm {
ki++ pi++
} }
case state == expectColon && b[i] == ':': case state == expectColon && b[i] == ':':
@ -92,7 +87,7 @@ func Strip(b []byte, path []string) []byte {
b = b[s:(e + 1)] b = b[s:(e + 1)]
i = 0 i = 0
if ki == len(kb) { if pi == len(path) {
return b return b
} }
} }
@ -102,5 +97,5 @@ func Strip(b []byte, path []string) []byte {
} }
} }
return nil return ob
} }