Futher optimize json parsing and editing performance

This commit is contained in:
Vikram Rangnekar 2019-05-12 01:36:52 -04:00
parent 1e78491cb2
commit 6c9accb628
5 changed files with 153 additions and 100 deletions

View File

@ -1,21 +1,18 @@
package json
package ajson
import (
"bytes"
"crypto/sha1"
"github.com/cespare/xxhash/v2"
)
func Filter(w *bytes.Buffer, b []byte, keys []string) error {
state := expectKey
var err error
kmap := make(map[[20]byte]struct{}, len(keys))
kmap := make(map[uint64]struct{}, len(keys))
for _, k := range keys {
h := sha1.Sum([]byte(k))
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
for i := range keys {
kmap[xxhash.Sum64String(keys[i])] = struct{}{}
}
// is an list
@ -29,7 +26,8 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
s, e, d := 0, 0, 0
kf := false
var k []byte
state := expectKey
for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
@ -67,8 +65,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
}
case state == expectKeyClose && b[i] == '"':
state = expectColon
k := b[(s + 1):i]
_, kf = kmap[sha1.Sum(k)]
k = b[(s + 1):i]
case state == expectColon && b[i] == ':':
state = expectValue
@ -115,7 +112,7 @@ func Filter(w *bytes.Buffer, b []byte, keys []string) error {
cb := b[s:(e + 1)]
e = 0
if !kf {
if _, ok := kmap[xxhash.Sum64(k)]; !ok {
continue
}

View File

@ -1,7 +1,7 @@
package json
package ajson
import (
"crypto/sha1"
"github.com/cespare/xxhash/v2"
)
const (
@ -21,27 +21,35 @@ type Field struct {
Value []byte
}
func Value(b []byte) []byte {
e := (len(b) - 1)
switch {
case b[0] == '"' && b[e] == '"':
return b[1:(len(b) - 1)]
case b[0] == '[' && b[e] == ']':
return nil
case b[0] == '{' && b[e] == '}':
return nil
default:
return b
}
}
func Get(b []byte, keys [][]byte) []Field {
s := 0
state := expectKey
kmap := make(map[uint64]struct{}, len(keys))
kmap := make(map[[20]byte]struct{}, len(keys))
for _, k := range keys {
h := sha1.Sum(k)
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
for i := range keys {
kmap[xxhash.Sum64(keys[i])] = struct{}{}
}
prealloc := 20
res := make([]Field, prealloc)
res := make([]Field, 20)
s, e, d := 0, 0, 0
var kf bool
var k []byte
state := expectKey
n := 0
for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
switch b[i] {
@ -60,7 +68,6 @@ func Get(b []byte, keys [][]byte) []Field {
case state == expectKeyClose && b[i] == '"':
state = expectColon
k = b[(s + 1):i]
_, kf = kmap[sha1.Sum(k)]
case state == expectColon && b[i] == ':':
state = expectValue
@ -110,13 +117,11 @@ func Get(b []byte, keys [][]byte) []Field {
}
if e != 0 {
if kf {
if len(res) == cap(res) {
r := make([]Field, 0, (len(res) * 2))
copy(r, res)
res = r
}
res = append(res, Field{k, b[s:(e + 1)]})
_, ok := kmap[xxhash.Sum64(k)]
if ok {
res[n] = Field{k, b[s:(e + 1)]}
n++
}
state = expectKey
@ -124,5 +129,5 @@ func Get(b []byte, keys [][]byte) []Field {
}
}
return res
return res[:n]
}

View File

@ -1,4 +1,4 @@
package json
package ajson
import (
"bytes"
@ -130,7 +130,7 @@ var (
}`
input4 = `
[{
{ "users" : [{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
@ -148,7 +148,7 @@ var (
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id": [{ "name": "hello" }, { "name": "world"}]
}]`
}] }`
)
func TestGet(t *testing.T) {
@ -194,6 +194,28 @@ func TestGet(t *testing.T) {
}
}
func TestValue(t *testing.T) {
v1 := []byte("12345")
if !bytes.Equal(Value(v1), v1) {
t.Fatal("Number value invalid")
}
v2 := []byte(`"12345"`)
if !bytes.Equal(Value(v2), []byte(`12345`)) {
t.Fatal("String value invalid")
}
v3 := []byte(`{ "hello": "world" }`)
if Value(v3) != nil {
t.Fatal("Object value is not nil", Value(v3))
}
v4 := []byte(`[ "hello", "world" ]`)
if Value(v4) != nil {
t.Fatal("List value is not nil")
}
}
func TestFilter(t *testing.T) {
var b bytes.Buffer
Filter(&b, []byte(input2), []string{"id", "full_name", "embed"})
@ -206,13 +228,22 @@ func TestFilter(t *testing.T) {
}
func TestStrip(t *testing.T) {
value := Strip([]byte(input3), []string{"data", "users"})
path1 := [][]byte{[]byte("data"), []byte("users")}
value1 := Strip([]byte(input3), path1)
expected := []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)
if bytes.Equal(value, expected) == false {
t.Log(value)
t.Error("Does not match expected json")
if bytes.Equal(value1, expected) == false {
t.Log(value1)
t.Error("[Valid path] Does not match expected json")
}
path2 := [][]byte{[]byte("boo"), []byte("hoo")}
value2 := Strip([]byte(input3), path2)
if bytes.Equal(value2, []byte(input3)) == false {
t.Log(value2)
t.Error("[Invalid path] Does not match expected json")
}
}
@ -229,7 +260,7 @@ func TestReplace(t *testing.T) {
{[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)},
}
expected := `[{
expected := `{ "users" : [{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
@ -247,7 +278,7 @@ func TestReplace(t *testing.T) {
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id":"1234567890"
}]`
}] }`
err := Replace(&buf, []byte(input4), from, to)
if err != nil {
@ -255,6 +286,23 @@ func TestReplace(t *testing.T) {
}
if buf.String() != expected {
t.Log(buf.String())
t.Error("Does not match expected json")
}
}
func TestReplaceEmpty(t *testing.T) {
var buf bytes.Buffer
json := `{ "users" : [{"id":1,"full_name":"Sidney Stroman","email":"user0@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":2,"full_name":"Jerry Dickinson","email":"user1@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":3,"full_name":"Kenna Cassin","email":"user2@demo.com","__users_twitter_id":"2048666903444506956"}, {"id":4,"full_name":"Mr. Pat Parisian","email":"rodney@kautzer.biz","__users_twitter_id":"2048666903444506956"}, {"id":5,"full_name":"Bette Ebert","email":"janeenrath@goyette.com","__users_twitter_id":"2048666903444506956"}, {"id":6,"full_name":"Everett Kiehn","email":"michael@bartoletti.com","__users_twitter_id":"2048666903444506956"}, {"id":7,"full_name":"Katrina Cronin","email":"loretaklocko@framivolkman.org","__users_twitter_id":"2048666903444506956"}, {"id":8,"full_name":"Caroll Orn Sr.","email":"joannarau@hegmann.io","__users_twitter_id":"2048666903444506956"}, {"id":9,"full_name":"Gwendolyn Ziemann","email":"renaytoy@rutherford.co","__users_twitter_id":"2048666903444506956"}, {"id":10,"full_name":"Mrs. Rosann Fritsch","email":"holliemosciski@thiel.org","__users_twitter_id":"2048666903444506956"}, {"id":11,"full_name":"Arden Koss","email":"cristobalankunding@howewelch.org","__users_twitter_id":"2048666903444506956"}, {"id":12,"full_name":"Brenton Bauch PhD","email":"renee@miller.co","__users_twitter_id":"2048666903444506956"}, {"id":13,"full_name":"Daine Gleichner","email":"andrea@nienow.co","__users_twitter_id":"2048666903444506956"}] }`
err := Replace(&buf, []byte(json), []Field{}, []Field{})
if err != nil {
t.Fatal(err)
}
if buf.String() != json {
t.Log(buf.String())
t.Error("Does not match expected json")
}
}
@ -284,10 +332,11 @@ func BenchmarkFilter(b *testing.B) {
}
func BenchmarkStrip(b *testing.B) {
path := [][]byte{[]byte("data"), []byte("users")}
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Strip([]byte(input3), []string{"data", "users"})
Strip([]byte(input3), path)
}
}

View File

@ -1,9 +1,10 @@
package json
package ajson
import (
"bytes"
"crypto/sha1"
"errors"
"github.com/cespare/xxhash/v2"
)
func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
@ -11,31 +12,25 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
return errors.New("'from' and 'to' must be of the same length")
}
fmap := make(map[[20]byte]int, (len(from) * 2))
tmap := make(map[[20]byte]int, (len(from)))
h := xxhash.New()
tmap := make(map[uint64]int, len(from))
for i, f := range from {
h1 := sha1.Sum(f.Key)
n, ok := fmap[h1]
if !ok {
fmap[h1] = i
n = i
}
h.Write(f.Key)
h.Write(f.Value)
h2 := sha1.Sum(f.Value)
fmap[h2] = n
tmap[h2] = i
tmap[h.Sum64()] = i
h.Reset()
}
state := expectKey
ws, we := 0, len(b)
s, e, d := 0, 0, 0
fi := -1
state := expectKey
ws, we := -1, len(b)
for i := 0; i < len(b); i++ {
// skip any left padding whitespace
if ws == 0 && (b[i] == '{' || b[i] == '[') {
if ws == -1 && (b[i] == '{' || b[i] == '[') {
ws = i
}
@ -55,11 +50,8 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
case state == expectKeyClose && b[i] == '"':
state = expectColon
h1 := sha1.Sum(b[(s + 1):i])
if n, ok := fmap[h1]; ok {
we = s
fi = n
}
h.Write(b[(s + 1):i])
we = s
case state == expectColon && b[i] == ':':
state = expectValue
@ -109,43 +101,58 @@ func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
if e != 0 {
e++
h2 := sha1.Sum(b[s:e])
replace := false
h.Write(b[s:e])
n, ok := tmap[h.Sum64()]
h.Reset()
if n, ok1 := fmap[h2]; ok1 && n == fi {
ti, ok2 := tmap[h2]
if ok {
if _, err := w.Write(b[ws:(we + 1)]); err != nil {
return err
}
if ok2 {
if _, err := w.Write(b[ws:(we + 1)]); err != nil {
return err
}
if _, err := w.Write(to[ti].Key); err != nil {
if len(to[n].Key) != 0 {
var err error
if _, err := w.Write(to[n].Key); err != nil {
return err
}
if _, err := w.WriteString(`":`); err != nil {
return err
}
if _, err := w.Write(to[ti].Value); err != nil {
if len(to[n].Value) != 0 {
_, err = w.Write(to[n].Value)
} else {
_, err = w.WriteString("null")
}
if err != nil {
return err
}
replace = true
ws = e
} else if b[e] == ',' {
ws = e + 1
} else {
ws = e
}
}
if !replace && (b[s] == '[' || b[s] == '{') {
if !ok && (b[s] == '[' || b[s] == '{') {
// the i++ in the for loop will add 1 so we account for that (s - 1)
i = s - 1
}
state = expectKey
we = len(b)
fi = -1
e = 0
d = 0
}
}
w.Write(b[ws:we])
if ws == -1 || (ws == 0 && we == len(b)) {
w.Write(b)
} else {
w.Write(b[ws:we])
}
return nil
}

View File

@ -1,21 +1,16 @@
package json
package ajson
import (
"bytes"
)
func Strip(b []byte, path []string) []byte {
s := 0
state := expectKey
kb := make([][]byte, 0, len(path))
for _, k := range path {
kb = append(kb, []byte(k))
}
func Strip(b []byte, path [][]byte) []byte {
s, e, d := 0, 0, 0
ki := 0
ob := b
pi := 0
pm := false
state := expectKey
for i := 0; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
@ -34,12 +29,12 @@ func Strip(b []byte, path []string) []byte {
case state == expectKeyClose && b[i] == '"':
state = expectColon
if ki == len(kb) {
ki = 0
if pi == len(path) {
pi = 0
}
pm = bytes.Equal(b[(s+1):i], kb[ki])
pm = bytes.Equal(b[(s+1):i], path[pi])
if pm {
ki++
pi++
}
case state == expectColon && b[i] == ':':
@ -92,7 +87,7 @@ func Strip(b []byte, path []string) []byte {
b = b[s:(e + 1)]
i = 0
if ki == len(kb) {
if pi == len(path) {
return b
}
}
@ -102,5 +97,5 @@ func Strip(b []byte, path []string) []byte {
}
}
return nil
return ob
}