我正在努力用haskell的所有孩子删除一个Element。 任务是从给定的xml文档中剥离所有table-tags(也许我还没有理解游标的概念,或者它是我缺少的其他东西)。
我尝试了三种不同的方法:
xml-conduit
xml-lens
ghc-8.0.1
test.xml
)/输出 INPUT EXPECTED OUTPUT (for the filtered cases)
<?xml version="1.0"?> | <?xml version="1.0"?>
<root> | <root>
<a> | <a>
... | ...
</a> | </a>
<b> | <b>
<table> | <bb>
<!--table entries--> | ...
</table> | </bb>
<bb> | </b>
... | <c>
</bb> | <cc>
</b> | ...
<c> | </cc>
<cc> | </c>
... | </root>
</cc>
</c>
</root>
{-# LANGUAGE OverloadedStrings #-}
module Minimal where
import Control.Lens
import Data.Conduit.Text as CT
import Data.Default
import qualified Data.Text.Lazy.IO as TIO
import Text.XML
import Text.XML.Cursor
import qualified Text.XML.Lens as L
import Data.Maybe (isNothing, isJust)
main :: IO ()
main = do test <- Text.XML.readFile def "./test.xml"
pput $ filterDocument test
let cursor = fromDocument test
pput $ docUp $ elemUp $ getRoot ((head $ cursor $// checkName (== "table")) {child = []} )
pput $ docUp $ elemUp $ filterChildren (checkName (/= "table")) cursor
return ()
filterChildren :: Axis -> Cursor -> Cursor
filterChildren pred c = c {child = map (filterChildren pred) (c $/ pred) }
filterDocument :: Document -> Document
filterDocument doc = doc & (L.root.L.entire.filtered (\e -> isJust $ e^?L.named "table") .~ emptyElemt)
where emptyElemt = Element "empty" mempty []
-- helper functions --
docUp :: Element -> Document
docUp e = Document {documentRoot = e, documentPrologue = Prologue [] Nothing [], documentEpilogue = [] }
elemUp :: Cursor -> Element
elemUp cursor = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = [node cursor]}
elemUp' :: [Cursor] -> Element
elemUp' cursors = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = map node cursors}
getRoot :: Cursor -> Cursor
getRoot c = let p = (c $| parent)
in if null p then c else getRoot $ head p
pput :: Document -> IO ()
pput = TIO.putStrLn . renderText pretty
where pretty = def {rsPretty = True}
> stack ghci
. . .
Ok, modules loaded: Minimal.
λ > main
<?xml version="1.0" encoding="UTF-8"?>
<root>
<a>
...
</a>
<b>
<empty>
<!-- table entries -->
</empty>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
答案 0 :(得分:1)
我不知道Text.XML
,但这是Text.XML.Light
的解决方案:
module Minimal where
import Data.Maybe(catMaybes)
import Text.XML.Light.Input
import Text.XML.Light.Output
import Text.XML.Light.Types
main :: IO ()
main = do
test <- parseXML <$> readFile "./test.xml"
mapM_ (putStrLn . ppContent) . catMaybes $ map cutTables test
cutTables :: Content -> Maybe Content
cutTables (Elem e) = if qName (elName e) == "table" then Nothing else
Just . Elem $ e { elContent = catMaybes . map cutTables $ elContent e }
cutTables x = Just x
答案 1 :(得分:1)
此代码似乎可以根据xml-conduit进行操作。我从yesod网络书example开始,通过简单的递归函数实现转换。
{-# LANGUAGE OverloadedStrings #-}
import qualified Data.Map as M
import Prelude hiding (readFile, writeFile)
import Text.XML
main :: IO ()
main = do
Document prologue root epilogue <- readFile def "test.xml"
let root' = transform root
writeFile def
{ rsPretty = True
} "output.html" $ Document prologue root' epilogue
transform :: Element -> Element
transform (Element _name attrs children) =
Element _name attrs (filterChildren children)
filterChildren :: [Node] -> [Node]
filterChildren = concatMap kickTable
where
kickTable :: Node -> [Node]
kickTable (NodeElement (Element "table" attrs children)) = -- Drop it
[ ]
kickTable (NodeElement (Element n attrs children)) = -- Recurse on
[ NodeElement (Element n attrs (filterChildren children)) ]
kickTable n = -- ok - whatever
[ n ]
我的镜头foo不足以说明为什么你的解决方案不起作用,但是来自文档 - 你必须小心filtered
不要违反遍历法,尽管我不知道是什么当你违反它们时就会发生。
希望有所帮助。