From 03d66e3b6aec19a7fe443854382f3e766332e048 Mon Sep 17 00:00:00 2001 From: Azzaare Date: Thu, 5 Sep 2024 11:23:49 +0900 Subject: [PATCH 1/2] Add a JSON extention --- Project.toml | 7 +++++ ext/XMLJSONExt.jl | 58 +++++++++++++++++++++++++++++++++++++ src/XML.jl | 68 ++++++++++++++++++++++++-------------------- test/JSONExt.jl | 8 ++++++ test/Project.toml | 1 + test/data/toJSON.xml | 23 +++++++++++++++ test/runtests.jl | 30 ++++++++++--------- 7 files changed, 150 insertions(+), 45 deletions(-) create mode 100644 ext/XMLJSONExt.jl create mode 100644 test/JSONExt.jl create mode 100644 test/data/toJSON.xml diff --git a/Project.toml b/Project.toml index e03f58b..d7a0238 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,13 @@ version = "0.3.4" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +[weakdeps] +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" + +[extensions] +XMLJSONExt = ["JSON"] + [compat] +JSON = "0.21" OrderedCollections = "1.4, 1.5" julia = "1.6" diff --git a/ext/XMLJSONExt.jl b/ext/XMLJSONExt.jl new file mode 100644 index 0000000..a3157f7 --- /dev/null +++ b/ext/XMLJSONExt.jl @@ -0,0 +1,58 @@ +module XMLJSONExt + +using JSON +using OrderedCollections +using XML + +function XML.xml2dicts(node::Node) + if nodetype(node) == XML.Document + # root node has no tag and 1 child, so it is special, just apply to its child + return XML.xml2dicts(only(node.children)) + elseif nodetype(node) == XML.Text + # text nodes have no tag, and just have contents + return OrderedDict("_" => node.value) + elseif nodetype(node) == XML.Element + # normal case + dict = OrderedDict{String,Any}() + # first put in the attributes + if !isnothing(attributes(node)) + merge!(dict, attributes(node)) + end + # then any children + for child in children(node) + child_result = XML.xml2dicts(child) + for (key, value) in child_result + if haskey(dict, key) + if isa(dict[key], Vector) + push!(dict[key], value) + else + dict[key] = [dict[key], value] + end + else + dict[key] = value + end + end + end + return OrderedDict(tag(node) => dict) + else + throw(DomainError(nodetype(node), "unsupported node type")) + end +end + + + +function XML.xml2json(xml::Node, json="") + dict_result = XML.xml2dicts(xml) + + if isdir(dirname(json)) + open(json, "w") do io + JSON.print(io, dict_result, 2) + end + else + return JSON.json(dict_result) + end +end + +XML.xml2json(xml::IO, json="") = XML.xml2json(read(xml, String), json) + +end # module diff --git a/src/XML.jl b/src/XML.jl index 942599f..4159278 100644 --- a/src/XML.jl +++ b/src/XML.jl @@ -9,7 +9,9 @@ export # Interface: children, nodetype, tag, attributes, value, is_simple, simplevalue, simple_value, # Extended Interface for LazyNode: - parent, depth, next, prev + parent, depth, next, prev, + # Extension XMLJSONExt: + xml2dicts, xml2json #-----------------------------------------------------------------------------# escape/unescape const escape_chars = ('&' => "&", '<' => "<", '>' => ">", "'" => "'", '"' => """) @@ -69,9 +71,9 @@ A Lazy representation of an XML node. """ mutable struct LazyNode <: AbstractXMLNode raw::Raw - tag::Union{Nothing, String} - attributes::Union{Nothing, OrderedDict{String, String}} - value::Union{Nothing, String} + tag::Union{Nothing,String} + attributes::Union{Nothing,OrderedDict{String,String}} + value::Union{Nothing,String} end LazyNode(raw::Raw) = LazyNode(raw, nothing, nothing, nothing) @@ -126,10 +128,10 @@ A representation of an XML DOM node. For simpler construction, use `(::NodeType """ struct Node <: AbstractXMLNode nodetype::NodeType - tag::Union{Nothing, String} - attributes::Union{Nothing, OrderedDict{String, String}} - value::Union{Nothing, String} - children::Union{Nothing, Vector{Node}} + tag::Union{Nothing,String} + attributes::Union{Nothing,OrderedDict{String,String}} + value::Union{Nothing,String} + children::Union{Nothing,Vector{Node}} function Node(nodetype::NodeType, tag=nothing, attributes=nothing, value=nothing, children=nothing) new(nodetype, @@ -137,22 +139,22 @@ struct Node <: AbstractXMLNode isnothing(attributes) ? nothing : OrderedDict(string(k) => string(v) for (k, v) in pairs(attributes)), isnothing(value) ? nothing : string(value), isnothing(children) ? nothing : - children isa Node ? [children] : - children isa Vector{Node} ? children : - children isa Vector ? map(Node, children) : - children isa Tuple ? map(Node, collect(children)) : - [Node(children)] + children isa Node ? [children] : + children isa Vector{Node} ? children : + children isa Vector ? map(Node, children) : + children isa Tuple ? map(Node, collect(children)) : + [Node(children)] ) end end function Node(o::Node, x...; kw...) attrs = !isnothing(kw) ? - merge( - OrderedDict(string(k) => string(v) for (k,v) in pairs(kw)), - isnothing(o.attributes) ? OrderedDict{String, String}() : o.attributes - ) : - o.attributes + merge( + OrderedDict(string(k) => string(v) for (k, v) in pairs(kw)), + isnothing(o.attributes) ? OrderedDict{String,String}() : o.attributes + ) : + o.attributes children = isempty(x) ? o.children : vcat(isnothing(o.children) ? [] : o.children, collect(x)) Node(o.nodetype, o.tag, attrs, o.value, children) end @@ -171,7 +173,7 @@ Node(data::Raw) = Node(LazyNode(data)) # Anything that's not Vector{UInt8} or a (Lazy)Node is converted to a Text Node Node(x) = Node(Text, nothing, nothing, string(x), nothing) -h(tag::Union{Symbol, String}, children...; kw...) = Node(Element, tag, kw, nothing, children) +h(tag::Union{Symbol,String}, children...; kw...) = Node(Element, tag, kw, nothing, children) Base.getproperty(::typeof(h), tag::Symbol) = h(tag) (o::Node)(children...; kw...) = Node(o, Node.(children)...; kw...) @@ -261,7 +263,7 @@ next(o) = missing prev(o) = missing is_simple(o) = nodetype(o) == Element && (isnothing(attributes(o)) || isempty(attributes(o))) && - length(children(o)) == 1 && nodetype(only(o)) in (Text, CData) + length(children(o)) == 1 && nodetype(only(o)) in (Text, CData) simple_value(o) = is_simple(o) ? value(only(o)) : error("`XML.simple_value` is only defined for simple nodes.") @@ -274,22 +276,22 @@ function nodes_equal(a, b) out &= XML.attributes(a) == XML.attributes(b) out &= XML.value(a) == XML.value(b) out &= length(XML.children(a)) == length(XML.children(b)) - out &= all(nodes_equal(ai, bi) for (ai,bi) in zip(XML.children(a), XML.children(b))) + out &= all(nodes_equal(ai, bi) for (ai, bi) in zip(XML.children(a), XML.children(b))) return out end Base.:(==)(a::AbstractXMLNode, b::AbstractXMLNode) = nodes_equal(a, b) #-----------------------------------------------------------------------------# parse -Base.parse(::Type{T}, str::AbstractString) where {T <: AbstractXMLNode} = parse(str, T) +Base.parse(::Type{T}, str::AbstractString) where {T<:AbstractXMLNode} = parse(str, T) #-----------------------------------------------------------------------------# indexing -Base.getindex(o::Union{Raw, AbstractXMLNode}) = o -Base.getindex(o::Union{Raw, AbstractXMLNode}, i::Integer) = children(o)[i] -Base.getindex(o::Union{Raw, AbstractXMLNode}, ::Colon) = children(o) -Base.lastindex(o::Union{Raw, AbstractXMLNode}) = lastindex(children(o)) +Base.getindex(o::Union{Raw,AbstractXMLNode}) = o +Base.getindex(o::Union{Raw,AbstractXMLNode}, i::Integer) = children(o)[i] +Base.getindex(o::Union{Raw,AbstractXMLNode}, ::Colon) = children(o) +Base.lastindex(o::Union{Raw,AbstractXMLNode}) = lastindex(children(o)) -Base.only(o::Union{Raw, AbstractXMLNode}) = only(children(o)) +Base.only(o::Union{Raw,AbstractXMLNode}) = only(children(o)) Base.length(o::AbstractXMLNode) = length(children(o)) @@ -338,7 +340,7 @@ end function _print_attrs(io::IO, o; color=:normal) attr = attributes(o) isnothing(attr) && return nothing - for (k,v) in attr + for (k, v) in attr # printstyled(io, ' ', k, '=', '"', v, '"'; color) print(io, ' ', k, '=', '"', v, '"') end @@ -356,13 +358,13 @@ write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io))) write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w") function write(io::IO, x; indentsize::Int=2, depth::Int=depth(x)) - indent = ' ' ^ indentsize + indent = ' '^indentsize nodetype = XML.nodetype(x) tag = XML.tag(x) value = XML.value(x) children = XML.children(x) - padding = indent ^ max(0, depth - 1) + padding = indent^max(0, depth - 1) print(io, padding) if nodetype === Text print(io, value) @@ -377,7 +379,7 @@ function write(io::IO, x; indentsize::Int=2, depth::Int=depth(x)) else println(io) foreach(children) do child - write(io, child; indentsize, depth = depth + 1) + write(io, child; indentsize, depth=depth + 1) println(io) end print(io, padding, "') @@ -407,4 +409,8 @@ function write(io::IO, x; indentsize::Int=2, depth::Int=depth(x)) end end +# Extension XMLJSONExt +function xml2dicts end +function xml2json end + end diff --git a/test/JSONExt.jl b/test/JSONExt.jl new file mode 100644 index 0000000..3f8b748 --- /dev/null +++ b/test/JSONExt.jl @@ -0,0 +1,8 @@ +using JSON + +@testset "XML to JSON" begin + xml = read("data/toJSON.xml", Node) + json = xml2json(xml) + d = xml2dicts(xml) + @test JSON.parse(json) == d +end diff --git a/test/Project.toml b/test/Project.toml index d4883bd..c041e46 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/data/toJSON.xml b/test/data/toJSON.xml new file mode 100644 index 0000000..7d9435b --- /dev/null +++ b/test/data/toJSON.xml @@ -0,0 +1,23 @@ + + + 0 1 + 0 1 + 0 1 + + + + x y + (0,0) (1,1) + + + x z + (0,0) (1,1) + + + y z + (0,1) (1,0) + + + \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e3b4e29..a63e754 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -58,28 +58,28 @@ end #-----------------------------------------------------------------------------# Raw @testset "Raw tag/attributes/value" begin examples = [ - (xml = "", - nodetype = DTD, + (xml="", + nodetype=DTD, tag=nothing, attributes=nothing, value="html"), - (xml = "", - nodetype = Declaration, + (xml="", + nodetype=Declaration, tag=nothing, attributes=Dict("version" => "1.0", "key" => "value"), value=nothing), - (xml = "", - nodetype = Element, + (xml="", + nodetype=Element, tag="tag", attributes=Dict("_id" => "1", "x" => "abc"), value=nothing), - (xml = "", - nodetype = Comment, + (xml="", + nodetype=Comment, tag=nothing, attributes=nothing, value=" comment "), - (xml = "", - nodetype = CData, + (xml="", + nodetype=CData, tag=nothing, attributes=nothing, value="cdata test"), @@ -129,7 +129,7 @@ end idx = findall(next_res .!= prev_res) - for (a,b) in zip(next_res, prev_res) + for (a, b) in zip(next_res, prev_res) @test a == b end end @@ -172,7 +172,7 @@ end @test node == node2 #For debugging: - for (a,b) in zip(AbstractTrees.Leaves(node), AbstractTrees.Leaves(node2)) + for (a, b) in zip(AbstractTrees.Leaves(node), AbstractTrees.Leaves(node2)) if a != b @info path @info a @@ -192,7 +192,7 @@ end ProcessingInstruction("xml-stylesheet", href="mystyle.css", type="text/css"), Element("root_tag", CData("cdata"), Text("text")) ) - @test map(nodetype, children(doc)) == [DTD,Declaration,Comment,ProcessingInstruction,Element] + @test map(nodetype, children(doc)) == [DTD, Declaration, Comment, ProcessingInstruction, Element] @test length(children(doc[end])) == 2 @test nodetype(doc[end][1]) == XML.CData @test nodetype(doc[end][2]) == XML.Text @@ -221,6 +221,8 @@ end # https://github.com/JuliaComputing/XML.jl/issues/14 (Sorted Attributes) kw = NamedTuple(OrderedDict(Symbol(k) => Int(k) for k in 'a':'z')) - xyz = XML.Element("point"; kw...) + xyz = XML.Element("point"; kw...) @test collect(keys(attributes(xyz))) == string.(collect('a':'z')) end + +include("JSONExt.jl") From 2e18a90e07317cef12e87aff1de83dbb616922e1 Mon Sep 17 00:00:00 2001 From: Azzaare Date: Thu, 5 Sep 2024 11:54:55 +0900 Subject: [PATCH 2/2] Fix test to ignore XMLJSONExt for julia version 1.9 --- Project.toml | 15 +++++++++------ ext/XMLJSONExt.jl | 2 +- test/runtests.jl | 4 +++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index d7a0238..ee73597 100644 --- a/Project.toml +++ b/Project.toml @@ -7,13 +7,16 @@ version = "0.3.4" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -[weakdeps] -JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +[compat] +JSON = "0.21" +OrderedCollections = "1.4, 1.5, 1.6" +julia = "1.6" [extensions] XMLJSONExt = ["JSON"] -[compat] -JSON = "0.21" -OrderedCollections = "1.4, 1.5" -julia = "1.6" +[extras] +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" + +[weakdeps] +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/ext/XMLJSONExt.jl b/ext/XMLJSONExt.jl index a3157f7..510ba0f 100644 --- a/ext/XMLJSONExt.jl +++ b/ext/XMLJSONExt.jl @@ -1,6 +1,6 @@ module XMLJSONExt -using JSON +isdefined(Base, :get_extension) ? (using JSON) : (using ..JSON) using OrderedCollections using XML diff --git a/test/runtests.jl b/test/runtests.jl index a63e754..8289357 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -225,4 +225,6 @@ end @test collect(keys(attributes(xyz))) == string.(collect('a':'z')) end -include("JSONExt.jl") +if isdefined(Base, :get_extension) + include("JSONExt.jl") +end