Thursday, July 08, 2010

Parsing XML Into T-SQL Tables

Ok, some heavy SQLXML work here...

The aim was to make a query which would take any XML and list, in one table, all the elements, their attributes and their element and attribute values of the immediate child nodes of the root node. On top of that, each immediate child of the root had to be enumerated so it could be referenced properly when being inserted into another table/s.

Example input:

<root>
<date>07/07/10</date>
<start>07/02/10</start>
<m code="AL">
<Time>10:22</Time>
<Ask>1943.25</Ask>
<Open>1940.00</Open>
</m>
<m code="AA">
<Time>14:42</Time>
<Ask>143.05</Ask>
<Open>230.50</Open>
</m>
</root>

Example output:

ElementIndexParentElementFieldNameNodeTypeFieldValue
1mTimeelement10:22
1mAskelement1943.25
1mOpenelement1940.00
1mcodeattributeAL
2mTimeelement14:42
2mAskelement143.05
2mOpenelement230.50
2mcodeattributeAA

And finally, the SQL...

declare @xml XML
set @xml = 'YOUR XML HERE'

create table #t
(
rownum int,
xmlextract xml
)

-- populate the table with the child nodes of the root, if they have their own sub nodes
insert into #t
SELECT
row_number() over(order by getdate()) as 'RowNum',
pref.query('.') as XmlExtract
FROM
@xml.nodes('/*/*') AS extract(pref)
WHERE
pref.value('./*[1]', 'nvarchar(50)') IS NOT NULL

SELECT distinct rownum 'ElementIndex', ParentName 'ParentElement', FieldName, NodeType, FieldValue FROM
(
-- selects the attributes and their values from the xml
SELECT DISTINCT
rownum,
SubEl.SubElem.value('local-name(..)', 'nvarchar(50)') AS ParentName,
SubEl.SubElem.value('local-name(.)', 'nvarchar(50)') AS FieldName,
'attribute' as NodeType,
SubEl.SubElem.value('.', 'nvarchar(150)') AS FieldValue
FROM
#t
CROSS APPLY
xmlextract.nodes('//@*') AS SubEl(SubElem)
UNION
-- selects the inner element and their values from the xml
SELECT
rownum,
SubEl.SubElem.value('local-name(..)', 'nvarchar(50)') AS ParentName,
SubEl.SubElem.value('local-name(.)', 'nvarchar(50)') AS FieldName,
'element' as NodeType,
SubEl.SubElem.value('.', 'nvarchar(150)') AS FieldValue
FROM
#t
CROSS APPLY
xmlextract.nodes('/*/*') AS SubEl(SubElem)
) as tmp
ORDER BY ElementIndex, ParentElement, FieldName

drop table #t

However, for performance and tidy-up reasons, when using this code in a stored procedure, you may want to use a table variable. Here's the modified version:

declare @xml XML
set @xml = 'YOUR XML HERE'

declare @t table
(
rownum int,
xmlextract xml
)

-- populate the table with the child nodes of the root, if they have their own sub nodes
insert into @t
SELECT
row_number() over(order by getdate()) as 'RowNum',
pref.query('.') as XmlExtract
FROM
@xml.nodes('/*/*') AS extract(pref)
WHERE
pref.value('./*[1]', 'nvarchar(50)') IS NOT NULL

SELECT distinct rownum 'ElementIndex', ParentName 'ParentElement', FieldName, NodeType, FieldValue FROM
(
-- selects the attributes and their values from the xml
SELECT DISTINCT
rownum,
SubEl.SubElem.value('local-name(..)', 'nvarchar(50)') AS ParentName,
SubEl.SubElem.value('local-name(.)', 'nvarchar(50)') AS FieldName,
'attribute' as NodeType,
SubEl.SubElem.value('.', 'nvarchar(150)') AS FieldValue
FROM
@t
CROSS APPLY
xmlextract.nodes('//@*') AS SubEl(SubElem)
UNION
-- selects the inner element and their values from the xml
SELECT
rownum,
SubEl.SubElem.value('local-name(..)', 'nvarchar(50)') AS ParentName,
SubEl.SubElem.value('local-name(.)', 'nvarchar(50)') AS FieldName,
'element' as NodeType,
SubEl.SubElem.value('.', 'nvarchar(150)') AS FieldValue
FROM
@t
CROSS APPLY
xmlextract.nodes('/*/*') AS SubEl(SubElem)
) as tmp
ORDER BY ElementIndex, ParentElement, FieldName

References:

1 comment:

  1. Here is q LINQ-To-SQL version:

    var data = @"<root>
    <date>07/07/10</date>
    <start>07/02/10</start>
    <m code=""AL"">
    <Time>10:22</Time>
    <Ask>1943.25</Ask>
    <Open>1940.00</Open>
    </m>
    <m code=""AA"">
    <Time>14:42</Time>
    <Ask>143.05</Ask>
    <Open>230.50</Open>
    </m>
    </root>";

    var xml = XDocument.Parse(data);

    var q = xml.Root.Elements()
    .Where(e => e.Elements().Any() || e.Attributes().Any())
    .SelectMany(
    (e, i) => e.Elements().Cast<XObject>().Union(e.Attributes().Cast<XObject>()).Select(n => new { i, n }),
    (e, h) =>
    new
    {
    ElementIndex = h.i,
    ParentElement = e.Name,
    FieldName = ((h.n is XElement) ? (h.n as XElement).Name : (h.n != null ? (h.n as XAttribute).Name : null)),
    NodeType = h.n.NodeType,
    FieldValue = ((h.n is XElement) ? (h.n as XElement).Value : (h.n != null ? (h.n as XAttribute).Value : null))
    });

    Console.WriteLine(q);

    ReplyDelete