Add where condition for check attribute value in XML nodes

Ali Soltani

I have this SQL query:

DECLARE @process TABLE(ID INT IDENTITY, workflowXML XML, Name nvarchar(250),Description nvarchar(MAX));
INSERT INTO @process(workflowXML,Name,Description) VALUES
    <Event type="start" id="StartEvent_1">
    <Task type="service" id="Task_0uurv2v">
    <Task type="user" id="Task_1yh7nak">
        <TaskUser RoleName="myFirstRole" />
        <TaskUser RoleName="mySecondRole" />
  ,'test Process 1')
    <Event type="start" id="StartEvent_1" name="Start">
    <Task type="service" id="Task_0a7vu1x">
    <Task type="user" id="Task_00ijt4n">
    <Task type="service" id="Task_1rnuz4y">
    <Task type="user" id="Task_1d4ykor">
        <TaskUser RoleName="myFirstRole" />
        <TaskUser RoleName="mySecondRole" />
    <Task type="user" id="Task_1sembw4">
        <TaskUser RoleName="myFirstRole" />
        <TaskUser RoleName="mySecondRole" />
  ,'test Process 2')
    <Event type="start" id="StartEvent_0bivq0x">
    <Task type="service" id="Task_141buye">
    <Task type="service" id="Task_1n3p00i" >
    <Task type="user" id="Task_0olxqpp">
    <Task type="user" id="Task_0zjgfkf">
            <TaskUser RoleName="mythirdRole" />
    <Task type="service" id="Task_1q71efy">
  ,'test Process 3')

WITH DerivedTable AS
    SELECT prTbl.ID AS tblID
          ,nd.value('local-name(.)','nvarchar(max)') AS NodeName
          ,nd.value('@type','nvarchar(max)') AS [Type]
          ,nd.value('@id','nvarchar(max)') AS Id
          ,nd.query('.') AS Task
          ,prTbl.Name AS [Name]
          ,prTbl.Description AS [Description]
    FROM @process AS prTbl
    CROSS APPLY prTbl.workflowXML.nodes('process') AS A(pr)
    CROSS APPLY pr.nodes('*') AS B(nd)
,AllIncoming AS
    SELECT tblId
          ,i.value('.','nvarchar(max)') AS [Target] 
    FROM DerivedTable
    CROSS APPLY Task.nodes('Task/incoming') AS A(i)

    WHERE NodeName='Task'
,recCTE AS
    SELECT tblID,NodeName,[Type],Id,[Name],[Description],Task,1 AS Step,' | ' +CAST(Id AS NVARCHAR(MAX)) AS NodePath
    FROM DerivedTable 
    WHERE [Type]='start'


    SELECT nxt.tblID,nxt.NodeName,nxt.[Type],nxt.Id,nxt.[Name],nxt.Description,nxt.Task,r.Step+1,r.NodePath + ' | ' + nxt.Id
    FROM recCTE AS r
    INNER JOIN DerivedTable AS nxt ON nxt.Id IN(SELECT x.Id 
                                                FROM AllIncoming AS x 
                                                WHERE x.[Target] IN (SELECT o.value('.','nvarchar(max)')
                                                                     FROM r.Task.nodes('*/outgoing') AS A(o)
    WHERE r.[Type]<>'user' 
      AND r.NodePath NOT LIKE '%| ' + nxt.Id + '%' 
      AND r.Step<=10 
select a.tblID as ProcessID,[Name],[Description],a.NodePath,a.Id as TaskID
    SELECT t.tblID 
    FROM recCTE AS t
    WHERE t.[Type]='user'
        AND t.Step<=ISNULL((SELECT MIN(x.Step) FROM recCTE AS x WHERE x.tblID=t.tblID AND x.[Type]='user' AND x.NodeName='Task'),10000)  
) a

This returns Task nodes that are first node can be seen from start event (Event type="start"). NodePath explains path from start event to target node. Result of this query is like this:


I need to new query that select from result where RoleName

   <TaskUser RoleName="myFirstRole" />
   <TaskUser RoleName="mySecondRole" />

exists in output of SplitbyDelimiter function.

SplitbyDelimiter function splits a string by ,. For example:

 select * from SplitbyDelimiter('myFirstRole,mySecondRole',',')



I edited DerivedTable part like this:

WITH DerivedTable AS
    SELECT prTbl.ID AS tblID
          ,nd.value('local-name(.)','nvarchar(max)') AS NodeName
          ,nd.value('@type','nvarchar(max)') AS [Type]
          ,nd.value('@id','nvarchar(max)') AS Id
          ,nd.query('.') AS Task
          ,prTbl.Name AS [Name]
          ,prTbl.Description AS [Description]
          --,t.c.value('@RoleName','nvarchar(max)') as [Role]
    FROM @process AS prTbl
    CROSS APPLY prTbl.workflowXML.nodes('process') AS A(pr)
    CROSS APPLY pr.nodes('*') AS B(nd)
    CROSS APPLY prTbl.workflowXML.nodes('process/Task/TaskUsers/TaskUser') AS t(c)
    where t.c.value('@RoleName','nvarchar(max)') in (select * from SplitbyDelimiter('myFirstRole,mySecondRole',','))

But It is not correct and return 216 records!(reason:Existence extra columns with different values).Also I added inner join result in end of query but this is not working fine.


I used this query:

WITH DerivedTable AS
    SELECT prTbl.ID AS tblID
          ,nd.value('local-name(.)','nvarchar(max)') AS NodeName
          ,nd.value('@type','nvarchar(max)') AS [Type]
          ,nd.value('@id','nvarchar(max)') AS Id
          ,nd.query('.') AS Task
          ,prTbl.Name AS [Name]
          ,prTbl.Description AS [Description]
          ,t.c.value('@RoleName','nvarchar(max)') as [Role]
    FROM @process AS prTbl
    CROSS APPLY prTbl.workflowXML.nodes('process') AS A(pr)
    CROSS APPLY pr.nodes('*') AS B(nd)
    CROSS APPLY prTbl.workflowXML.nodes('process/Task/TaskUsers/TaskUser') AS t(c)      
,AllIncoming AS
    SELECT tblId
          ,i.value('.','nvarchar(max)') AS [Target] 
    FROM DerivedTable
    CROSS APPLY Task.nodes('Task/incoming') AS A(i)

    WHERE NodeName='Task'
,recCTE AS
    SELECT tblID,NodeName,[Type],Id,[Name],[Description],[Role],Task,1 AS Step,' | ' +CAST(Id AS NVARCHAR(MAX)) AS NodePath
    FROM DerivedTable 
    WHERE [Type]='start'


    SELECT nxt.tblID,nxt.NodeName,nxt.[Type],nxt.Id,nxt.[Name],nxt.Description,nxt.Role,nxt.Task,r.Step+1,r.NodePath + ' | ' + nxt.Id
    FROM recCTE AS r
    INNER JOIN DerivedTable AS nxt ON nxt.Id IN(SELECT x.Id 
                                                FROM AllIncoming AS x 
                                                WHERE x.[Target] IN (SELECT o.value('.','nvarchar(max)')
                                                                     FROM r.Task.nodes('*/outgoing') AS A(o)
    WHERE r.[Type]<>'user' 
      AND r.NodePath NOT LIKE '%| ' + nxt.Id + '%' 
      AND r.Step<=10 
select a.tblID as ProcessID,[Name],[Description],a.NodePath,a.Id as TaskID,count(*) as records
    SELECT t.tblID 

    FROM recCTE AS t
    WHERE t.[Type]='user'
        AND t.Step<=ISNULL((SELECT MIN(x.Step) FROM recCTE AS x WHERE x.tblID=t.tblID AND x.[Type]='user' AND x.NodeName='Task'),10000)  
) a

INNER JOIN  [dbo].[SplitbyDelimiter]('myFirstRole,mySecondRole',',') r 
    ON r.TheField = a.Role
    group by a.tblID,[Name],[Description],a.NodePath,a.Id
    ORDER BY a.tblID

This returns: result2

This query return too many same records and run time for it is 19s!!. It is not good query.

It would be very helpful if someone could explain solution for this query.


Your query

I used this query:

WITH DerivedTable AS
    SELECT prTbl.ID AS tblID
          ,nd.value('local-name(.)','nvarchar(max)') AS NodeName
          ,nd.value('@type','nvarchar(max)') AS [Type]
          ,nd.value('@id','nvarchar(max)') AS Id
          ,nd.query('.') AS Task
          ,prTbl.Name AS [Name]
          ,prTbl.Description AS [Description]
          ,t.c.value('@RoleName','nvarchar(max)') as [Role]
    FROM @process AS prTbl
    CROSS APPLY prTbl.workflowXML.nodes('process') AS A(pr)
    CROSS APPLY pr.nodes('*') AS B(nd)
    CROSS APPLY prTbl.workflowXML.nodes('process/Task/TaskUsers/TaskUser') AS t(c)    


uses the wrong path in the last CROSS APPLY. You'll get all TaksUser elements. Hence the many rows... You might change this to

CROSS APPLY pr.nodes('*') AS B(nd)
OUTER APPLY nd.nodes('TaskUsers/TaskUser') AS t(c)   

to collect all TaskUser-elements below the current node...


The whole approach to work down your WorkFlow would break, if you add rows to the original resultset!

What you can try:

Add the information as additional column and let the rest untouched. In my query I do this twice (TaskUsers and RoleNames), as I do not really understand, what you want to do with this:

WITH DerivedTable AS
    SELECT prTbl.ID AS tblID
          ,nd.value('local-name(.)','nvarchar(max)') AS NodeName
          ,nd.value('@type','nvarchar(max)') AS [Type]
          ,nd.value('@id','nvarchar(max)') AS Id
          ,nd.query('.') AS Task
          ,prTbl.Name AS [Name]
          ,prTbl.Description AS [Description]
          ,nd.query('./TaskUsers/TaskUser') AS TaskUsers
          ,nd.query('./TaskUsers/TaskUser').query('for $rn in /TaskUser return string($rn/@RoleName)').value('.','nvarchar(max)') AS RoleNames
    FROM @process AS prTbl
    CROSS APPLY prTbl.workflowXML.nodes('process') AS A(pr)
    CROSS APPLY pr.nodes('*') AS B(nd)
,AllIncoming AS
    SELECT tblId
          ,i.value('.','nvarchar(max)') AS [Target] 
    FROM DerivedTable
    CROSS APPLY Task.nodes('Task/incoming') AS A(i)

    WHERE NodeName='Task'
,recCTE AS
    SELECT tblID,NodeName,[Type],Id,[Name],[Description],TaskUsers,RoleNames,Task,1 AS Step,' | ' +CAST(Id AS NVARCHAR(MAX)) AS NodePath
    FROM DerivedTable 
    WHERE [Type]='start'


    SELECT nxt.tblID,nxt.NodeName,nxt.[Type],nxt.Id,nxt.[Name],nxt.Description,nxt.TaskUsers,nxt.RoleNames,nxt.Task,r.Step+1,r.NodePath + ' | ' + nxt.Id
    FROM recCTE AS r
    INNER JOIN DerivedTable AS nxt ON nxt.Id IN(SELECT x.Id 
                                                FROM AllIncoming AS x 
                                                WHERE x.[Target] IN (SELECT o.value('.','nvarchar(max)')
                                                                     FROM r.Task.nodes('*/outgoing') AS A(o)
    WHERE r.[Type]<>'user' 
      AND r.NodePath NOT LIKE '%| ' + nxt.Id + '%' 
      AND r.Step<=10 
select a.tblID as ProcessID,[Name],[Description],TaskUsers,RoleNames,a.NodePath,a.Id as TaskID
    SELECT t.tblID 
    FROM recCTE AS t
    WHERE t.[Type]='user'
        AND t.Step<=ISNULL((SELECT MIN(x.Step) FROM recCTE AS x WHERE x.tblID=t.tblID AND x.[Type]='user' AND x.NodeName='Task'),10000)  
) a

The result looks like this

enter image description here

What ever you want to do with this additional information, any filter you want to apply, should be done as the last step...

But - to be honest - you've reached the limits of SQL-Server... This is not the right tool. Sooner or later there will be some more business logic to implement... If this has to be done within SQL Server you might think of a CLR-method...

