New low allocation fast json parsing and editing library

This commit is contained in:
Vikram Rangnekar 2019-05-08 19:03:18 -04:00
parent 7c37a1ef63
commit b405dafb89
5 changed files with 896 additions and 0 deletions

163
json/filter.go Normal file
View File

@ -0,0 +1,163 @@
package json
import (
"bytes"
"crypto/sha1"
)
func Filter(w *bytes.Buffer, b []byte, keys []string) error {
s := 0
state := expectKey
var err error
kmap := make(map[[20]byte]struct{}, len(keys))
for _, k := range keys {
h := sha1.Sum([]byte(k))
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
}
isList := false
item := 0
field := 0
for i := 0; i < len(b); i++ {
switch {
case state == expectKey:
switch b[i] {
case '[':
if !isList {
err = w.WriteByte('[')
}
isList = true
case '{':
if item == 0 {
err = w.WriteByte('{')
} else {
_, err = w.WriteString("},{")
}
item++
field = 0
case '"':
state = expectKeyClose
s = i
i++
}
case state == expectKeyClose && b[i] == '"':
state = expectColon
i++
}
if err != nil {
return nil
}
if state != expectColon {
continue
}
k := b[(s + 1):(i - 1)]
h := sha1.Sum(k)
_, kf := kmap[h]
e := 0
d := 0
for ; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
switch b[i] {
case '{', '[':
d++
case '}', ']':
d--
}
}
switch {
case state == expectColon && b[i] == ':':
state = expectValue
case state == expectValue && b[i] == '"':
state = expectString
case state == expectString && b[i] == '"':
e = i
case state == expectValue && b[i] == '[':
state = expectListClose
d++
case state == expectListClose && d == 0 && b[i] == ']':
e = i
case state == expectValue && b[i] == '{':
state = expectObjClose
d++
case state == expectObjClose && d == 0 && b[i] == '}':
e = i
case state == expectValue && (b[i] >= '0' && b[i] <= '9'):
state = expectNumClose
case state == expectNumClose &&
((b[i] < '0' || b[i] > '9') &&
(b[i] != '.' && b[i] != 'e' && b[i] != 'E' && b[i] != '+' && b[i] != '-')):
i--
e = i
case state == expectValue &&
(b[i] == 'f' || b[i] == 'F' || b[i] == 't' || b[i] == 'T'):
state = expectBoolClose
case state == expectBoolClose && (b[i] == 'e' || b[i] == 'E'):
e = i
}
if e != 0 {
if kf {
if field != 0 {
if err := w.WriteByte(','); err != nil {
return err
}
}
cb := b[s:(e + 1)]
sk := 0
for i := 0; i < len(cb); i++ {
if cb[i] == '\n' || cb[i] == '\t' {
if _, err := w.Write(cb[sk:i]); err != nil {
return err
}
sk = i + 1
}
}
if sk > 0 && sk < len(cb) {
_, err = w.Write(cb[sk:len(cb)])
} else {
_, err = w.Write(cb)
}
if err != nil {
return err
}
field++
}
state = expectKey
break
}
}
}
if item != 0 {
if err := w.WriteByte('}'); err != nil {
return err
}
}
if isList {
if err := w.WriteByte(']'); err != nil {
return err
}
}
return nil
}

139
json/get.go Normal file
View File

@ -0,0 +1,139 @@
package json
import (
"crypto/sha1"
)
const (
expectKey int = iota
expectKeyClose
expectColon
expectValue
expectString
expectListClose
expectObjClose
expectBoolClose
expectNumClose
)
type Field struct {
Key []byte
Value []byte
}
func Get(b []byte, keys [][]byte) []Field {
s := 0
state := expectKey
kmap := make(map[[20]byte]struct{}, len(keys))
for _, k := range keys {
h := sha1.Sum(k)
if _, ok := kmap[h]; !ok {
kmap[h] = struct{}{}
}
}
l := 10
res := make([]Field, l)
for i := 0; i < len(b); i++ {
switch {
case state == expectKey && b[i] == '"':
state = expectKeyClose
s = i + 1
continue
case state == expectKeyClose && b[i] == '"':
state = expectColon
}
if state != expectColon {
continue
}
k := b[s:i]
h := sha1.Sum(k)
_, kf := kmap[h]
e := 0
d := 0
for ; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
switch b[i] {
case '{', '[':
d++
case '}', ']':
d--
}
}
switch {
case state == expectColon && b[i] == ':':
state = expectValue
case state == expectValue && b[i] == '"':
state = expectString
s = i
case state == expectString && b[i] == '"':
e = i
case state == expectValue && b[i] == '[':
state = expectListClose
s = i
d++
case state == expectListClose && d == 0 && b[i] == ']':
e = i
i = s
case state == expectValue && b[i] == '{':
state = expectObjClose
s = i
d++
case state == expectObjClose && d == 0 && b[i] == '}':
e = i
i = s
case state == expectValue && (b[i] >= '0' && b[i] <= '9'):
state = expectNumClose
s = i
case state == expectNumClose &&
((b[i] < '0' || b[i] > '9') &&
(b[i] != '.' && b[i] != 'e' && b[i] != 'E' && b[i] != '+' && b[i] != '-')):
i--
e = i
case state == expectValue &&
(b[i] == 'f' || b[i] == 'F' || b[i] == 't' || b[i] == 'T'):
state = expectBoolClose
s = i
case state == expectBoolClose && (b[i] == 'e' || b[i] == 'E'):
e = i
}
if e != 0 {
e++
if kf {
if len(res) == cap(res) {
r := make([]Field, 0, (len(res) + l))
copy(r, res)
res = r
}
res = append(res, Field{k, b[s:e]})
}
state = expectKey
break
}
}
}
return res
}

317
json/json_test.go Normal file
View File

@ -0,0 +1,317 @@
package json
import (
"bytes"
"testing"
)
var (
input1 = `
{
"data": {
"test": { "__twitter_id": "ABCD" },
"users": [
{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
"__twitter_id": "2048666903444506956",
"embed": {
"id": 8,
"full_name": "Caroll Orn Sr.",
"email": "joannarau@hegmann.io",
"__twitter_id": "ABC123"
}
},
{
"id": 2,
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id": [{ "name": "hello" }, { "name": "world"}]
},
{
"id": 3,
"full_name": "Kenna Cassin",
"email": "user2@demo.com",
"__twitter_id": { "name": "hello", "address": { "work": "1 infinity loop" } }
},
{
"id": 4,
"full_name": "Mr. Pat Parisian",
"email": "__twitter_id",
"__twitter_id": 1234567890
},
{
"id": 5,
"full_name": "Bette Ebert",
"email": "janeenrath@goyette.com",
"__twitter_id": 1.23E
},
{
"id": 6,
"full_name": "Everett Kiehn",
"email": "michael@bartoletti.com",
"__twitter_id": true
},
{
"id": 7,
"full_name": "Katrina Cronin",
"email": "loretaklocko@framivolkman.org",
"__twitter_id": false
},
{
"id": 8,
"full_name": "Caroll Orn Sr.",
"email": "joannarau@hegmann.io",
"__twitter_id": "2048666903444506956"
},
{
"id": 9,
"full_name": "Gwendolyn Ziemann",
"email": "renaytoy@rutherford.co",
"__twitter_id": ["hello", "world"]
},
{
"id": 10,
"full_name": "Mrs. Rosann Fritsch",
"email": "holliemosciski@thiel.org",
"__twitter_id": "2048666903444506956"
},
{
"id": 11,
"full_name": "Arden Koss",
"email": "cristobalankunding@howewelch.org",
"__twitter_id": "2048666903444506956"
},
{
"id": 12,
"full_name": "Brenton Bauch PhD",
"email": "renee@miller.co",
"__twitter_id": 1
},
{
"id": 13,
"full_name": "Daine Gleichner",
"email": "andrea@gmail.com",
"__twitter_id": "",
"id__twitter_id": "NOOO",
"work_email": "andrea@nienow.co"
}
]}
}`
input2 = `
[{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
"__twitter_id": "2048666903444506956",
"embed": {
"id": 8,
"full_name": "Caroll Orn Sr.",
"email": "joannarau@hegmann.io",
"__twitter_id": "ABC123"
}
},
{
"m": 1,
"id": 2,
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id": [{ "name": "hello" }, { "name": "world"}]
}]`
input3 = `
{
"data": {
"test": { "__twitter_id": "ABCD" },
"users": [{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]
}
}`
input4 = `
[{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
"__twitter_id": "2048666903444506956",
"embed": {
"id": 8,
"full_name": "Caroll Orn Sr.",
"email": "joannarau@hegmann.io",
"__twitter_id": "ABC123"
}
},
{
"m": 1,
"id": 2,
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id": [{ "name": "hello" }, { "name": "world"}]
}]`
)
func TestGet(t *testing.T) {
values := Get([]byte(input1), [][]byte{
[]byte("__twitter_id"),
[]byte("work_email"),
})
expected := []Field{
{[]byte("__twitter_id"), []byte(`"ABCD"`)},
{[]byte("__twitter_id"), []byte(`"2048666903444506956"`)},
{[]byte("__twitter_id"), []byte(`"ABC123"`)},
{[]byte("__twitter_id"),
[]byte(`[{ "name": "hello" }, { "name": "world"}]`)},
{[]byte("__twitter_id"),
[]byte(`{ "name": "hello", "address": { "work": "1 infinity loop" } }`),
},
{[]byte("__twitter_id"), []byte(`1234567890`)},
{[]byte("__twitter_id"), []byte(`1.23E`)},
{[]byte("__twitter_id"), []byte(`true`)},
{[]byte("__twitter_id"), []byte(`false`)},
{[]byte("__twitter_id"), []byte(`"2048666903444506956"`)},
{[]byte("__twitter_id"), []byte(`["hello", "world"]`)},
{[]byte("__twitter_id"), []byte(`"2048666903444506956"`)},
{[]byte("__twitter_id"), []byte(`"2048666903444506956"`)},
{[]byte("__twitter_id"), []byte(`1`)},
{[]byte("__twitter_id"), []byte(`""`)},
{[]byte("work_email"), []byte(`"andrea@nienow.co"`)},
}
if len(values) != len(expected) {
t.Fatal("len(values) != len(expected)")
}
for i := range expected {
if bytes.Equal(values[i].Key, expected[i].Key) == false {
t.Error(string(values[i].Key), " != ", string(expected[i].Key))
}
if bytes.Equal(values[i].Value, expected[i].Value) == false {
t.Error(string(values[i].Value), " != ", string(expected[i].Value))
}
}
}
func TestFilter(t *testing.T) {
var b bytes.Buffer
Filter(&b, []byte(input2), []string{"id", "full_name", "embed"})
expected := `[{"id": 1,"full_name": "Sidney Stroman","embed": {"id": 8,"full_name": "Caroll Orn Sr.","email": "joannarau@hegmann.io","__twitter_id": "ABC123"}},{"id": 2,"full_name": "Jerry Dickinson"}]`
if b.String() != expected {
t.Error("Does not match expected json")
}
}
func TestStrip(t *testing.T) {
value := Strip([]byte(input3), []string{"data", "users"})
expected := []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)
if bytes.Equal(value, expected) == false {
t.Log(value)
t.Error("Does not match expected json")
}
}
func TestReplace(t *testing.T) {
var buf bytes.Buffer
from := []Field{
{[]byte("__twitter_id"), []byte(`[{ "name": "hello" }, { "name": "world"}]`)},
{[]byte("__twitter_id"), []byte(`"ABC123"`)},
}
to := []Field{
{[]byte("__twitter_id"), []byte(`"1234567890"`)},
{[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)},
}
expected := `[{
"id": 1,
"full_name": "Sidney Stroman",
"email": "user0@demo.com",
"__twitter_id": "2048666903444506956",
"embed": {
"id": 8,
"full_name": "Caroll Orn Sr.",
"email": "joannarau@hegmann.io",
"some_list":[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]
}
},
{
"m": 1,
"id": 2,
"full_name": "Jerry Dickinson",
"email": "user1@demo.com",
"__twitter_id":"1234567890"
}]`
err := Replace(&buf, []byte(input4), from, to)
if err != nil {
t.Fatal(err)
}
if buf.String() != expected {
t.Error("Does not match expected json")
}
}
func BenchmarkGet(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Get([]byte(input1), [][]byte{[]byte("__twitter_id")})
}
}
func BenchmarkFilter(b *testing.B) {
var buf bytes.Buffer
keys := []string{"id", "full_name", "embed", "email", "__twitter_id"}
b.ResetTimer()
b.ReportAllocs()
for n := 0; n < b.N; n++ {
err := Filter(&buf, []byte(input2), keys)
if err != nil {
b.Fatal(err)
}
buf.Reset()
}
}
func BenchmarkStrip(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Strip([]byte(input3), []string{"data", "users"})
}
}
func BenchmarkReplace(b *testing.B) {
var buf bytes.Buffer
from := []Field{
{[]byte("__twitter_id"), []byte(`[{ "name": "hello" }, { "name": "world"}]`)},
{[]byte("__twitter_id"), []byte(`"ABC123"`)},
}
to := []Field{
{[]byte("__twitter_id"), []byte(`"1234567890"`)},
{[]byte("some_list"), []byte(`[{"id":1,"embed":{"id":8}},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13}]`)},
}
b.ResetTimer()
b.ReportAllocs()
for n := 0; n < b.N; n++ {
err := Replace(&buf, []byte(input4), from, to)
if err != nil {
b.Fatal(err)
}
buf.Reset()
}
}

159
json/replace.go Normal file
View File

@ -0,0 +1,159 @@
package json
import (
"bytes"
"crypto/sha1"
"errors"
)
func Replace(w *bytes.Buffer, b []byte, from, to []Field) error {
if len(from) != len(to) {
return errors.New("'from' and 'to' must be of the same length")
}
state := expectKey
ws, we := 0, len(b)
s := 0
fi := -1
fmap := make(map[[20]byte]int, (len(from) * 2))
tmap := make(map[[20]byte]int, (len(from)))
for i, f := range from {
h1 := sha1.Sum(f.Key)
n, ok := fmap[h1]
if !ok {
fmap[h1] = i
n = i
}
h2 := sha1.Sum(f.Value)
fmap[h2] = n
tmap[h2] = i
}
for i := 0; i < len(b); i++ {
switch {
case ws == 0 && b[i] == '{' || b[i] == '[':
ws = i
case state == expectKey && b[i] == '"':
state = expectKeyClose
s = i + 1
continue
case state == expectKeyClose && b[i] == '"':
state = expectColon
default:
continue
}
if state != expectColon {
continue
}
h1 := sha1.Sum(b[s:i])
if n, ok := fmap[h1]; ok {
we = s
fi = n
}
e := 0
d := 0
for ; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
switch b[i] {
case '{', '[':
d++
case '}', ']':
d--
}
}
switch {
case state == expectColon && b[i] == ':':
state = expectValue
case state == expectValue && b[i] == '"':
state = expectString
s = i
case state == expectString && b[i] == '"':
e = i
case state == expectValue && b[i] == '[':
state = expectListClose
s = i
d++
case state == expectListClose && d == 0 && b[i] == ']':
e = i
case state == expectValue && b[i] == '{':
state = expectObjClose
s = i
d++
case state == expectObjClose && d == 0 && b[i] == '}':
e = i
case state == expectValue && (b[i] >= '0' && b[i] <= '9'):
state = expectNumClose
s = i
case state == expectNumClose &&
((b[i] < '0' || b[i] > '9') &&
(b[i] != '.' && b[i] != 'e' && b[i] != 'E' && b[i] != '+' && b[i] != '-')):
i--
e = i
case state == expectValue &&
(b[i] == 'f' || b[i] == 'F' || b[i] == 't' || b[i] == 'T'):
state = expectBoolClose
s = i
case state == expectBoolClose && (b[i] == 'e' || b[i] == 'E'):
e = i
}
if e != 0 {
e++
h2 := sha1.Sum(b[s:e])
if n, ok1 := fmap[h2]; ok1 && n == fi {
ti, ok2 := tmap[h2]
if ok2 {
if _, err := w.Write(b[ws:we]); err != nil {
return err
}
if _, err := w.Write(to[ti].Key); err != nil {
return err
}
if _, err := w.WriteString(`":`); err != nil {
return err
}
if _, err := w.Write(to[ti].Value); err != nil {
return err
}
ws = e
}
}
if ws != e && (b[s] == '[' || b[s] == '{') {
i = s - 1
}
state = expectKey
we = len(b)
fi = -1
break
}
}
}
w.Write(b[ws:we])
return nil
}

118
json/strip.go Normal file
View File

@ -0,0 +1,118 @@
package json
import (
"bytes"
)
func Strip(b []byte, path []string) []byte {
s := 0
state := expectKey
kb := make([][]byte, 0, len(path))
ki := 0
for _, k := range path {
kb = append(kb, []byte(k))
}
for i := 0; i < len(b); i++ {
switch {
case state == expectKey && b[i] == '"':
state = expectKeyClose
s = i + 1
continue
case state == expectKeyClose && b[i] == '"':
state = expectColon
}
if state != expectColon {
continue
}
if ki >= len(kb) {
return nil
}
if !bytes.Equal(b[s:i], kb[ki]) {
state = expectKey
continue
}
ki++
e := 0
d := 0
s := 0
for ; i < len(b); i++ {
if state == expectObjClose || state == expectListClose {
switch b[i] {
case '{', '[':
d++
case '}', ']':
d--
}
}
switch {
case state == expectColon && b[i] == ':':
state = expectValue
case state == expectValue && b[i] == '"':
state = expectString
s = i
case state == expectString && b[i] == '"':
e = i
case state == expectValue && b[i] == '[':
state = expectListClose
s = i
d++
case state == expectListClose && d == 0 && b[i] == ']':
e = i
case state == expectValue && b[i] == '{':
state = expectObjClose
s = i
d++
case state == expectObjClose && d == 0 && b[i] == '}':
e = i
case state == expectValue && (b[i] >= '0' && b[i] <= '9'):
state = expectNumClose
s = i
case state == expectNumClose &&
((b[i] < '0' || b[i] > '9') &&
(b[i] != '.' && b[i] != 'e' && b[i] != 'E' && b[i] != '+' && b[i] != '-')):
i--
e = i
case state == expectValue &&
(b[i] == 'f' || b[i] == 'F' || b[i] == 't' || b[i] == 'T'):
state = expectBoolClose
s = i
case state == expectBoolClose && (b[i] == 'e' || b[i] == 'E'):
e = i
}
if e != 0 && (b[s] == '[' || b[s] == '{') {
e++
b = b[s:e]
i = 0
if ki == len(kb) {
return b
}
state = expectKey
break
}
}
}
return nil
}