faster xml parser

This commit is contained in:
Robert Janetzko 2022-04-19 18:32:20 +00:00
parent d29ab0dce1
commit 767216b2cc
5 changed files with 11921 additions and 13536 deletions

View file

@ -25,8 +25,7 @@ var backendTemplate = template.Must(template.New("").Funcs(template.FuncMap{
package model
import (
"encoding/xml"
"strconv"
"github.com/robertjanetzko/LegendsBrowser2/backend/util"
"fmt"
"encoding/json"
)
@ -140,19 +139,13 @@ func (x *{{ $obj.Name }}) CheckFields() {
// Parser
func n(d []byte) int {
v, _ := strconv.Atoi(string(d))
return v
}
{{- range $name, $obj := $.Objects }}
{{- range $plus := $.Modes }}
func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(d *xml.Decoder, start *xml.StartElement{{ if $plus }}, obj *{{ $obj.Name }}{{ end }}) (*{{ $obj.Name }}, error) {
func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(p *util.XMLParser{{ if $plus }}, obj *{{ $obj.Name }}{{ end }}) (*{{ $obj.Name }}, error) {
var (
{{- if not $plus }}
obj = &{{ $obj.Name }}{}
{{- end }}
data []byte
)
{{- if $plus }}
if obj == nil {
@ -165,75 +158,63 @@ func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(d *xml.Decoder, start *xml.
{{- end }}
for {
tok, err := d.Token()
t, n, err := p.Token()
if err != nil {
return nil, err
}
switch t := tok.(type) {
case xml.StartElement:
switch t.Name.Local {
switch t {
case util.StartElement:
switch n {
{{- range $fname, $field := $obj.Fields }}
{{- if $field.Active $plus }}
case "{{ $fname }}":
{{ $field.StartAction $plus }}
{{- end }}
{{- end }}
{{- if and (eq $fname "type") (not (not $obj.SubTypes)) }}
data, err := p.Value()
if err != nil {
return nil, err
}
switch data {
{{- range $sub := ($obj.ActiveSubTypes $plus) }}
case "{{ $sub.Case }}":
{{- if eq 1 (len $sub.Options) }}
{{- if not $plus }}
obj.Details, err = parse{{ $sub.Name }}(p)
{{- else }}
obj.Details, err = parse{{ $sub.Name }}Plus(p, obj.Details.(*{{ $sub.Name }}))
{{- end }}
{{- else }}
switch details := obj.Details.(type) {
{{- range $opt := $sub.Options }}
case *{{ $opt}}:
obj.Details, err = parse{{ $opt }}Plus(p, details)
{{- end }}
default:
fmt.Println("unknown subtype option", obj.Details)
p.Skip()
}
{{- end }}
{{- end }}
default:
p.Skip()
}
if err != nil {
return nil, err
}
return obj, nil
{{- else }}
{{ $field.StartAction $obj $plus }}
{{- end }}
{{- end }}
{{- end }}
default:
// fmt.Println("unknown field", t.Name.Local)
d.Skip()
// fmt.Println("unknown field", n)
p.Skip()
}
case xml.CharData:
data = append(data, t...)
case xml.EndElement:
if t.Name.Local == start.Name.Local {
case util.EndElement:
obj.CheckFields()
return obj, nil
}
switch t.Name.Local {
{{- range $fname, $field := $obj.Fields }}{{- if $field.Active $plus }}
case "{{ $fname }}":
{{- if and (eq $fname "type") (not (not $obj.SubTypes)) }}
var err error
switch string(data) {
{{- range $sub := ($obj.ActiveSubTypes $plus) }}
case "{{ $sub.Case }}":
{{- if eq 1 (len $sub.Options) }}
{{- if not $plus }}
obj.Details, err = parse{{ $sub.Name }}(d, start)
{{- else }}
obj.Details, err = parse{{ $sub.Name }}Plus(d, start, obj.Details.(*{{ $sub.Name }}))
{{- end }}
{{- else }}
switch details := obj.Details.(type) {
{{- range $opt := $sub.Options }}
case *{{ $opt}}:
obj.Details, err = parse{{ $opt }}Plus(d, start, details)
{{- end }}
default:
fmt.Println("unknown subtype option", obj.Details)
d.Skip()
}
{{- end }}
{{- end }}
default:
d.Skip()
}
if err != nil {
return nil, err
}
return obj, nil
{{- else }}
{{ $field.EndAction $obj }}
{{- end }}
{{- end }}{{- end }}
default:
// fmt.Println("unknown field", t.Name.Local)
}
}
}
}
@ -322,7 +303,7 @@ func (f Field) Init(plus bool) string {
return ""
}
func (f Field) StartAction(plus bool) string {
func (f Field) StartAction(obj Object, plus bool) string {
n := f.Name
if n == "Id" || n == "Name" {
@ -332,9 +313,9 @@ func (f Field) StartAction(plus bool) string {
if f.Type == "object" {
var p string
if !plus {
p = fmt.Sprintf("v, _ := parse%s(d, &t)", *f.ElementType)
p = fmt.Sprintf("v, _ := parse%s(p)", *f.ElementType)
} else {
p = fmt.Sprintf("v, _ := parse%sPlus(d, &t, &%s{})", *f.ElementType, *f.ElementType)
p = fmt.Sprintf("v, _ := parse%sPlus(p, &%s{})", *f.ElementType, *f.ElementType)
}
if !f.Multiple {
return fmt.Sprintf("%s\nobj.%s = v", p, n)
@ -347,21 +328,50 @@ func (f Field) StartAction(plus bool) string {
gen := fmt.Sprintf("parse%s", *f.ElementType)
if f.Type == "array" {
return fmt.Sprintf("parseArray(d, &obj.%s, %s)", f.Name, gen)
return fmt.Sprintf("parseArray(p, &obj.%s, %s)", f.Name, gen)
}
if f.Type == "map" {
if !plus {
return fmt.Sprintf("parseMap(d, &obj.%s, %s)", f.Name, gen)
return fmt.Sprintf("parseMap(p, &obj.%s, %s)", f.Name, gen)
} else {
gen = fmt.Sprintf("parse%sPlus", *f.ElementType)
return fmt.Sprintf("parseMapPlus(d, &obj.%s, %s)", f.Name, gen)
return fmt.Sprintf("parseMapPlus(p, &obj.%s, %s)", f.Name, gen)
}
}
}
if f.Type == "int" || f.Type == "string" || f.Type == "bool" || f.Type == "enum" {
return "data = nil"
n := f.Name
if n == "Id" || n == "Name" {
n = n + "_"
} else {
n = f.CorrectedName(obj)
}
s := "data, err := p.Value()\nif err != nil { return nil, err }\n"
if !f.Multiple {
if f.Type == "int" {
return fmt.Sprintf("%sobj.%s = num(data)", s, n)
} else if f.Type == "string" {
return fmt.Sprintf("%sobj.%s = string(data)", s, n)
} else if f.Type == "bool" {
s := "_, err := p.Value()\nif err != nil { return nil, err }\n"
return fmt.Sprintf("%sobj.%s = true", s, n)
} else if f.Type == "enum" {
return fmt.Sprintf("%sobj.%s = parse%s%s(string(data))", s, n, obj.Name, n)
}
} else {
if f.Type == "int" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, num(data))", s, n, n)
} else if f.Type == "string" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, string(data))", s, n, n)
} else if f.Type == "enum" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, parse%s%s(string(data)))", s, n, n, obj.Name, n)
}
}
}
return ""

View file

@ -116,7 +116,7 @@ func main() {
t := templates.New(functions)
if len(*f) > 0 {
defer profile.Start(profile.MemProfile).Stop()
defer profile.Start(profile.ProfilePath(".")).Stop()
go func() {
http.ListenAndServe(":8081", nil)
}()

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,7 @@
package model
import (
"bufio"
"encoding/json"
"encoding/xml"
"fmt"
@ -39,10 +40,32 @@ func NewLegendsDecoder(file string) (*xml.Decoder, *os.File, *pb.ProgressBar, er
return d, xmlFile, bar, err
}
func NewLegendsParser(file string) (*util.XMLParser, *os.File, *pb.ProgressBar, error) {
fi, err := os.Stat(file)
if err != nil {
return nil, nil, nil, err
}
size := fi.Size()
bar := pb.Full.Start64(size)
xmlFile, err := os.Open(file)
if err != nil {
fmt.Println(err)
}
fmt.Println("Successfully Opened", file)
converter := util.NewConvertReader(xmlFile)
barReader := bar.NewProxyReader(converter)
d := util.NewXMLParser(bufio.NewReader(barReader))
return d, xmlFile, bar, err
}
func Parse(file string) (*DfWorld, error) {
InitSameFields()
d, xmlFile, bar, err := NewLegendsDecoder(file)
p, xmlFile, bar, err := NewLegendsParser(file)
if err != nil {
return nil, err
}
@ -50,14 +73,14 @@ func Parse(file string) (*DfWorld, error) {
BaseLoop:
for {
tok, err := d.Token()
t, n, err := p.Token()
if err != nil {
return nil, err
}
switch t := tok.(type) {
case xml.StartElement:
if t.Name.Local == "df_world" {
world, err = parseDfWorld(d, &t)
switch t {
case util.StartElement:
if n == "df_world" {
world, err = parseDfWorld(p)
if err != nil {
return nil, err
}
@ -73,7 +96,7 @@ BaseLoop:
if plus {
file = strings.Replace(file, "-legends.xml", "-legends_plus.xml", 1)
d, xmlFile, bar, err := NewLegendsDecoder(file)
p, xmlFile, bar, err = NewLegendsParser(file)
if err != nil {
return nil, err
}
@ -81,14 +104,14 @@ BaseLoop:
PlusLoop:
for {
tok, err := d.Token()
t, n, err := p.Token()
if err != nil {
return nil, err
}
switch t := tok.(type) {
case xml.StartElement:
if t.Name.Local == "df_world" {
world, err = parseDfWorldPlus(d, &t, world)
switch t {
case util.StartElement:
if n == "df_world" {
world, err = parseDfWorldPlus(p, world)
if err != nil {
return nil, err
}
@ -109,89 +132,89 @@ BaseLoop:
return world, nil
}
func parseArray[T any](d *xml.Decoder, dest *[]T, creator func(*xml.Decoder, *xml.StartElement) (T, error)) {
func parseArray[T any](p *util.XMLParser, dest *[]T, creator func(*util.XMLParser) (T, error)) {
for {
tok, err := d.Token()
t, _, err := p.Token()
if err != nil {
return // nil, err
}
switch t := tok.(type) {
case xml.StartElement:
x, _ := creator(d, &t)
switch t {
case util.StartElement:
x, _ := creator(p)
*dest = append(*dest, x)
case xml.EndElement:
case util.EndElement:
return
}
}
}
func parseMap[T Identifiable](d *xml.Decoder, dest *map[int]T, creator func(*xml.Decoder, *xml.StartElement) (T, error)) {
func parseMap[T Identifiable](p *util.XMLParser, dest *map[int]T, creator func(*util.XMLParser) (T, error)) {
for {
tok, err := d.Token()
t, _, err := p.Token()
if err != nil {
return // nil, err
}
switch t := tok.(type) {
case xml.StartElement:
x, _ := creator(d, &t)
switch t {
case util.StartElement:
x, _ := creator(p)
(*dest)[x.Id()] = x
case xml.EndElement:
case util.EndElement:
return
}
}
}
func parseMapPlus[T Identifiable](d *xml.Decoder, dest *map[int]T, creator func(*xml.Decoder, *xml.StartElement, T) (T, error)) {
func parseMapPlus[T Identifiable](p *util.XMLParser, dest *map[int]T, creator func(*util.XMLParser, T) (T, error)) {
for {
tok, err := d.Token()
t, _, err := p.Token()
if err != nil {
return
}
switch t := tok.(type) {
case xml.StartElement:
id, err := parseId(d)
switch t {
case util.StartElement:
id, err := parseId(p)
if err != nil {
log.Fatal(err)
}
x, err := creator(d, &t, (*dest)[id])
x, err := creator(p, (*dest)[id])
if err != nil {
return
}
(*dest)[id] = x
case xml.EndElement:
case util.EndElement:
return
}
}
}
func parseId(d *xml.Decoder) (int, error) {
var data []byte
func parseId(p *util.XMLParser) (int, error) {
for {
tok, err := d.Token()
t, n, err := p.Token()
if err != nil {
return -1, err
}
switch t := tok.(type) {
case xml.StartElement:
data = nil
if t.Name.Local != "id" {
d.Skip()
// return -1, fmt.Errorf("expected id at: %d", d.InputOffset())
}
case xml.CharData:
data = append(data, t...)
case xml.EndElement:
if t.Name.Local == "id" {
return strconv.Atoi(string(data))
switch t {
case util.StartElement:
if n == "id" {
s, err := p.Value()
if err != nil {
return -1, err
}
return strconv.Atoi(s)
} else {
p.Skip()
}
}
}
}
func num(s string) int {
v, _ := strconv.Atoi(s)
return v
}
var sameFields map[string]map[string]map[string]bool
func exportSameFields() map[string]map[string]string {

164
backend/util/xml.go Normal file
View file

@ -0,0 +1,164 @@
package util
import (
"bufio"
)
type XMLParser struct {
reader *bufio.Reader
scratch *scratch
selfClose bool
lastElement string
}
func NewXMLParser(r *bufio.Reader) *XMLParser {
x := &XMLParser{
reader: r,
scratch: &scratch{data: make([]byte, 1024)},
}
x.skipDeclerations()
return x
}
func (x *XMLParser) skipDeclerations() error {
for {
b, err := x.reader.ReadByte()
if err != nil {
return err
}
if b == '>' {
return nil
}
}
}
type TokenType int
const (
StartElement TokenType = iota
EndElement
)
func (x *XMLParser) Token() (TokenType, string, error) {
if x.selfClose {
x.selfClose = false
return EndElement, x.lastElement, nil
}
var (
f, c bool
)
for {
b, err := x.reader.ReadByte()
if err != nil {
return 0, "", err
}
if b == '<' {
f = true
x.scratch.reset()
b, err := x.reader.ReadByte()
if err != nil {
return 0, "", err
}
if b == '/' {
c = true
} else {
x.scratch.add(b)
}
} else if b == '>' {
bs := x.scratch.bytes()
if bs[len(bs)-1] == '/' {
x.selfClose = true
x.lastElement = string(bs[:len(bs)-1])
return StartElement, x.lastElement, nil
} else {
if c {
return EndElement, string(bs), nil
} else {
return StartElement, string(bs), nil
}
}
} else if f {
x.scratch.add(b)
}
}
}
func (x *XMLParser) Value() (string, error) {
if x.selfClose {
x.selfClose = false
return "", nil
}
x.scratch.reset()
var (
f bool
)
for {
b, err := x.reader.ReadByte()
if err != nil {
return "", err
}
if b == '<' {
f = true
} else if f && b == '>' {
return string(x.scratch.bytes()), nil
} else if !f {
x.scratch.add(b)
}
}
}
func (x *XMLParser) Skip() error {
depth := 0
for {
t, _, err := x.Token()
if err != nil {
return err
}
switch t {
case StartElement:
depth++
case EndElement:
if depth == 0 {
return nil
}
depth--
}
}
}
// scratch taken from
//https://github.com/bcicen/jstream
type scratch struct {
data []byte
fill int
}
// reset scratch buffer
func (s *scratch) reset() { s.fill = 0 }
// bytes returns the written contents of scratch buffer
func (s *scratch) bytes() []byte { return s.data[0:s.fill] }
// grow scratch buffer
func (s *scratch) grow() {
ndata := make([]byte, cap(s.data)*2)
copy(ndata, s.data[:])
s.data = ndata
}
// append single byte to scratch buffer
func (s *scratch) add(c byte) {
if s.fill+1 >= cap(s.data) {
s.grow()
}
s.data[s.fill] = c
s.fill++
}