faster xml parser

This commit is contained in:
Robert Janetzko 2022-04-19 18:32:20 +00:00
parent d29ab0dce1
commit 767216b2cc
5 changed files with 11921 additions and 13536 deletions

View File

@ -25,8 +25,7 @@ var backendTemplate = template.Must(template.New("").Funcs(template.FuncMap{
package model package model
import ( import (
"encoding/xml" "github.com/robertjanetzko/LegendsBrowser2/backend/util"
"strconv"
"fmt" "fmt"
"encoding/json" "encoding/json"
) )
@ -140,19 +139,13 @@ func (x *{{ $obj.Name }}) CheckFields() {
// Parser // Parser
func n(d []byte) int {
v, _ := strconv.Atoi(string(d))
return v
}
{{- range $name, $obj := $.Objects }} {{- range $name, $obj := $.Objects }}
{{- range $plus := $.Modes }} {{- range $plus := $.Modes }}
func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(d *xml.Decoder, start *xml.StartElement{{ if $plus }}, obj *{{ $obj.Name }}{{ end }}) (*{{ $obj.Name }}, error) { func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(p *util.XMLParser{{ if $plus }}, obj *{{ $obj.Name }}{{ end }}) (*{{ $obj.Name }}, error) {
var ( var (
{{- if not $plus }} {{- if not $plus }}
obj = &{{ $obj.Name }}{} obj = &{{ $obj.Name }}{}
{{- end }} {{- end }}
data []byte
) )
{{- if $plus }} {{- if $plus }}
if obj == nil { if obj == nil {
@ -165,62 +158,44 @@ func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(d *xml.Decoder, start *xml.
{{- end }} {{- end }}
for { for {
tok, err := d.Token() t, n, err := p.Token()
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
switch t.Name.Local { switch n {
{{- range $fname, $field := $obj.Fields }} {{- range $fname, $field := $obj.Fields }}
{{- if $field.Active $plus }} {{- if $field.Active $plus }}
case "{{ $fname }}":
{{ $field.StartAction $plus }}
{{- end }}
{{- end }}
default:
// fmt.Println("unknown field", t.Name.Local)
d.Skip()
}
case xml.CharData:
data = append(data, t...)
case xml.EndElement:
if t.Name.Local == start.Name.Local {
obj.CheckFields()
return obj, nil
}
switch t.Name.Local {
{{- range $fname, $field := $obj.Fields }}{{- if $field.Active $plus }}
case "{{ $fname }}": case "{{ $fname }}":
{{- if and (eq $fname "type") (not (not $obj.SubTypes)) }} {{- if and (eq $fname "type") (not (not $obj.SubTypes)) }}
data, err := p.Value()
var err error if err != nil {
switch string(data) { return nil, err
}
switch data {
{{- range $sub := ($obj.ActiveSubTypes $plus) }} {{- range $sub := ($obj.ActiveSubTypes $plus) }}
case "{{ $sub.Case }}": case "{{ $sub.Case }}":
{{- if eq 1 (len $sub.Options) }} {{- if eq 1 (len $sub.Options) }}
{{- if not $plus }} {{- if not $plus }}
obj.Details, err = parse{{ $sub.Name }}(d, start) obj.Details, err = parse{{ $sub.Name }}(p)
{{- else }} {{- else }}
obj.Details, err = parse{{ $sub.Name }}Plus(d, start, obj.Details.(*{{ $sub.Name }})) obj.Details, err = parse{{ $sub.Name }}Plus(p, obj.Details.(*{{ $sub.Name }}))
{{- end }} {{- end }}
{{- else }} {{- else }}
switch details := obj.Details.(type) { switch details := obj.Details.(type) {
{{- range $opt := $sub.Options }} {{- range $opt := $sub.Options }}
case *{{ $opt}}: case *{{ $opt}}:
obj.Details, err = parse{{ $opt }}Plus(d, start, details) obj.Details, err = parse{{ $opt }}Plus(p, details)
{{- end }} {{- end }}
default: default:
fmt.Println("unknown subtype option", obj.Details) fmt.Println("unknown subtype option", obj.Details)
d.Skip() p.Skip()
} }
{{- end }} {{- end }}
{{- end }} {{- end }}
default: default:
d.Skip() p.Skip()
} }
if err != nil { if err != nil {
return nil, err return nil, err
@ -228,12 +203,18 @@ func parse{{ $obj.Name }}{{ if $plus }}Plus{{ end }}(d *xml.Decoder, start *xml.
return obj, nil return obj, nil
{{- else }} {{- else }}
{{ $field.EndAction $obj }} {{ $field.StartAction $obj $plus }}
{{- end }}
{{- end }}
{{- end }} {{- end }}
{{- end }}{{- end }}
default: default:
// fmt.Println("unknown field", t.Name.Local) // fmt.Println("unknown field", n)
p.Skip()
} }
case util.EndElement:
obj.CheckFields()
return obj, nil
} }
} }
} }
@ -322,7 +303,7 @@ func (f Field) Init(plus bool) string {
return "" return ""
} }
func (f Field) StartAction(plus bool) string { func (f Field) StartAction(obj Object, plus bool) string {
n := f.Name n := f.Name
if n == "Id" || n == "Name" { if n == "Id" || n == "Name" {
@ -332,9 +313,9 @@ func (f Field) StartAction(plus bool) string {
if f.Type == "object" { if f.Type == "object" {
var p string var p string
if !plus { if !plus {
p = fmt.Sprintf("v, _ := parse%s(d, &t)", *f.ElementType) p = fmt.Sprintf("v, _ := parse%s(p)", *f.ElementType)
} else { } else {
p = fmt.Sprintf("v, _ := parse%sPlus(d, &t, &%s{})", *f.ElementType, *f.ElementType) p = fmt.Sprintf("v, _ := parse%sPlus(p, &%s{})", *f.ElementType, *f.ElementType)
} }
if !f.Multiple { if !f.Multiple {
return fmt.Sprintf("%s\nobj.%s = v", p, n) return fmt.Sprintf("%s\nobj.%s = v", p, n)
@ -347,21 +328,50 @@ func (f Field) StartAction(plus bool) string {
gen := fmt.Sprintf("parse%s", *f.ElementType) gen := fmt.Sprintf("parse%s", *f.ElementType)
if f.Type == "array" { if f.Type == "array" {
return fmt.Sprintf("parseArray(d, &obj.%s, %s)", f.Name, gen) return fmt.Sprintf("parseArray(p, &obj.%s, %s)", f.Name, gen)
} }
if f.Type == "map" { if f.Type == "map" {
if !plus { if !plus {
return fmt.Sprintf("parseMap(d, &obj.%s, %s)", f.Name, gen) return fmt.Sprintf("parseMap(p, &obj.%s, %s)", f.Name, gen)
} else { } else {
gen = fmt.Sprintf("parse%sPlus", *f.ElementType) gen = fmt.Sprintf("parse%sPlus", *f.ElementType)
return fmt.Sprintf("parseMapPlus(d, &obj.%s, %s)", f.Name, gen) return fmt.Sprintf("parseMapPlus(p, &obj.%s, %s)", f.Name, gen)
} }
} }
} }
if f.Type == "int" || f.Type == "string" || f.Type == "bool" || f.Type == "enum" { if f.Type == "int" || f.Type == "string" || f.Type == "bool" || f.Type == "enum" {
return "data = nil" n := f.Name
if n == "Id" || n == "Name" {
n = n + "_"
} else {
n = f.CorrectedName(obj)
}
s := "data, err := p.Value()\nif err != nil { return nil, err }\n"
if !f.Multiple {
if f.Type == "int" {
return fmt.Sprintf("%sobj.%s = num(data)", s, n)
} else if f.Type == "string" {
return fmt.Sprintf("%sobj.%s = string(data)", s, n)
} else if f.Type == "bool" {
s := "_, err := p.Value()\nif err != nil { return nil, err }\n"
return fmt.Sprintf("%sobj.%s = true", s, n)
} else if f.Type == "enum" {
return fmt.Sprintf("%sobj.%s = parse%s%s(string(data))", s, n, obj.Name, n)
}
} else {
if f.Type == "int" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, num(data))", s, n, n)
} else if f.Type == "string" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, string(data))", s, n, n)
} else if f.Type == "enum" {
return fmt.Sprintf("%sobj.%s = append(obj.%s, parse%s%s(string(data)))", s, n, n, obj.Name, n)
}
}
} }
return "" return ""

View File

@ -116,7 +116,7 @@ func main() {
t := templates.New(functions) t := templates.New(functions)
if len(*f) > 0 { if len(*f) > 0 {
defer profile.Start(profile.MemProfile).Stop() defer profile.Start(profile.ProfilePath(".")).Stop()
go func() { go func() {
http.ListenAndServe(":8081", nil) http.ListenAndServe(":8081", nil)
}() }()

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
package model package model
import ( import (
"bufio"
"encoding/json" "encoding/json"
"encoding/xml" "encoding/xml"
"fmt" "fmt"
@ -39,10 +40,32 @@ func NewLegendsDecoder(file string) (*xml.Decoder, *os.File, *pb.ProgressBar, er
return d, xmlFile, bar, err return d, xmlFile, bar, err
} }
func NewLegendsParser(file string) (*util.XMLParser, *os.File, *pb.ProgressBar, error) {
fi, err := os.Stat(file)
if err != nil {
return nil, nil, nil, err
}
size := fi.Size()
bar := pb.Full.Start64(size)
xmlFile, err := os.Open(file)
if err != nil {
fmt.Println(err)
}
fmt.Println("Successfully Opened", file)
converter := util.NewConvertReader(xmlFile)
barReader := bar.NewProxyReader(converter)
d := util.NewXMLParser(bufio.NewReader(barReader))
return d, xmlFile, bar, err
}
func Parse(file string) (*DfWorld, error) { func Parse(file string) (*DfWorld, error) {
InitSameFields() InitSameFields()
d, xmlFile, bar, err := NewLegendsDecoder(file) p, xmlFile, bar, err := NewLegendsParser(file)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -50,14 +73,14 @@ func Parse(file string) (*DfWorld, error) {
BaseLoop: BaseLoop:
for { for {
tok, err := d.Token() t, n, err := p.Token()
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
if t.Name.Local == "df_world" { if n == "df_world" {
world, err = parseDfWorld(d, &t) world, err = parseDfWorld(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -73,7 +96,7 @@ BaseLoop:
if plus { if plus {
file = strings.Replace(file, "-legends.xml", "-legends_plus.xml", 1) file = strings.Replace(file, "-legends.xml", "-legends_plus.xml", 1)
d, xmlFile, bar, err := NewLegendsDecoder(file) p, xmlFile, bar, err = NewLegendsParser(file)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -81,14 +104,14 @@ BaseLoop:
PlusLoop: PlusLoop:
for { for {
tok, err := d.Token() t, n, err := p.Token()
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
if t.Name.Local == "df_world" { if n == "df_world" {
world, err = parseDfWorldPlus(d, &t, world) world, err = parseDfWorldPlus(p, world)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -109,87 +132,87 @@ BaseLoop:
return world, nil return world, nil
} }
func parseArray[T any](d *xml.Decoder, dest *[]T, creator func(*xml.Decoder, *xml.StartElement) (T, error)) { func parseArray[T any](p *util.XMLParser, dest *[]T, creator func(*util.XMLParser) (T, error)) {
for { for {
tok, err := d.Token() t, _, err := p.Token()
if err != nil { if err != nil {
return // nil, err return // nil, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
x, _ := creator(d, &t) x, _ := creator(p)
*dest = append(*dest, x) *dest = append(*dest, x)
case xml.EndElement: case util.EndElement:
return return
} }
} }
} }
func parseMap[T Identifiable](d *xml.Decoder, dest *map[int]T, creator func(*xml.Decoder, *xml.StartElement) (T, error)) { func parseMap[T Identifiable](p *util.XMLParser, dest *map[int]T, creator func(*util.XMLParser) (T, error)) {
for { for {
tok, err := d.Token() t, _, err := p.Token()
if err != nil { if err != nil {
return // nil, err return // nil, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
x, _ := creator(d, &t) x, _ := creator(p)
(*dest)[x.Id()] = x (*dest)[x.Id()] = x
case xml.EndElement: case util.EndElement:
return return
} }
} }
} }
func parseMapPlus[T Identifiable](d *xml.Decoder, dest *map[int]T, creator func(*xml.Decoder, *xml.StartElement, T) (T, error)) { func parseMapPlus[T Identifiable](p *util.XMLParser, dest *map[int]T, creator func(*util.XMLParser, T) (T, error)) {
for { for {
tok, err := d.Token() t, _, err := p.Token()
if err != nil { if err != nil {
return return
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
id, err := parseId(d) id, err := parseId(p)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
x, err := creator(d, &t, (*dest)[id]) x, err := creator(p, (*dest)[id])
if err != nil { if err != nil {
return return
} }
(*dest)[id] = x (*dest)[id] = x
case xml.EndElement: case util.EndElement:
return return
} }
} }
} }
func parseId(d *xml.Decoder) (int, error) { func parseId(p *util.XMLParser) (int, error) {
var data []byte
for { for {
tok, err := d.Token() t, n, err := p.Token()
if err != nil { if err != nil {
return -1, err return -1, err
} }
switch t := tok.(type) { switch t {
case xml.StartElement: case util.StartElement:
data = nil if n == "id" {
if t.Name.Local != "id" { s, err := p.Value()
d.Skip() if err != nil {
// return -1, fmt.Errorf("expected id at: %d", d.InputOffset()) return -1, err
}
return strconv.Atoi(s)
} else {
p.Skip()
} }
case xml.CharData:
data = append(data, t...)
case xml.EndElement:
if t.Name.Local == "id" {
return strconv.Atoi(string(data))
} }
} }
} }
func num(s string) int {
v, _ := strconv.Atoi(s)
return v
} }
var sameFields map[string]map[string]map[string]bool var sameFields map[string]map[string]map[string]bool

164
backend/util/xml.go Normal file
View File

@ -0,0 +1,164 @@
package util
import (
"bufio"
)
type XMLParser struct {
reader *bufio.Reader
scratch *scratch
selfClose bool
lastElement string
}
func NewXMLParser(r *bufio.Reader) *XMLParser {
x := &XMLParser{
reader: r,
scratch: &scratch{data: make([]byte, 1024)},
}
x.skipDeclerations()
return x
}
func (x *XMLParser) skipDeclerations() error {
for {
b, err := x.reader.ReadByte()
if err != nil {
return err
}
if b == '>' {
return nil
}
}
}
type TokenType int
const (
StartElement TokenType = iota
EndElement
)
func (x *XMLParser) Token() (TokenType, string, error) {
if x.selfClose {
x.selfClose = false
return EndElement, x.lastElement, nil
}
var (
f, c bool
)
for {
b, err := x.reader.ReadByte()
if err != nil {
return 0, "", err
}
if b == '<' {
f = true
x.scratch.reset()
b, err := x.reader.ReadByte()
if err != nil {
return 0, "", err
}
if b == '/' {
c = true
} else {
x.scratch.add(b)
}
} else if b == '>' {
bs := x.scratch.bytes()
if bs[len(bs)-1] == '/' {
x.selfClose = true
x.lastElement = string(bs[:len(bs)-1])
return StartElement, x.lastElement, nil
} else {
if c {
return EndElement, string(bs), nil
} else {
return StartElement, string(bs), nil
}
}
} else if f {
x.scratch.add(b)
}
}
}
func (x *XMLParser) Value() (string, error) {
if x.selfClose {
x.selfClose = false
return "", nil
}
x.scratch.reset()
var (
f bool
)
for {
b, err := x.reader.ReadByte()
if err != nil {
return "", err
}
if b == '<' {
f = true
} else if f && b == '>' {
return string(x.scratch.bytes()), nil
} else if !f {
x.scratch.add(b)
}
}
}
func (x *XMLParser) Skip() error {
depth := 0
for {
t, _, err := x.Token()
if err != nil {
return err
}
switch t {
case StartElement:
depth++
case EndElement:
if depth == 0 {
return nil
}
depth--
}
}
}
// scratch taken from
//https://github.com/bcicen/jstream
type scratch struct {
data []byte
fill int
}
// reset scratch buffer
func (s *scratch) reset() { s.fill = 0 }
// bytes returns the written contents of scratch buffer
func (s *scratch) bytes() []byte { return s.data[0:s.fill] }
// grow scratch buffer
func (s *scratch) grow() {
ndata := make([]byte, cap(s.data)*2)
copy(ndata, s.data[:])
s.data = ndata
}
// append single byte to scratch buffer
func (s *scratch) add(c byte) {
if s.fill+1 >= cap(s.data) {
s.grow()
}
s.data[s.fill] = c
s.fill++
}