我在具有多个可选匹配项的查询中遇到 null 结果的问题。
# load package
library(rvest)
# Example URL
url <- 'https://uk.burberry.com/fringed-wool-cashmere-patchwork-cardigan-coat-p40612561'
# Read HTML code from the website
webpage <- read_html(url)
# using css selectors to scrape the ID section
id_data_html <- html_nodes(webpage, '.section')
#converting the ID to text
id_data <- html_text(id_data_html)
# Remove irrelevant text
id_data <- gsub("Item", "", id_data)
# using css selectors to scrape the names section
names_data_html <- html_nodes(webpage, '.type-h6')
#converting the names to text
names_data <- html_text(names_data_html)
# Stripping irrelevant text
names_data <- gsub("\n\t\t\t\t\t\t\t", "", names_data)
# using css selectors to scrape the price section
price_data_html <- html_nodes(webpage, '.l2')
#converting the price to text
price_data <- html_text(price_data_html)
# Remove irrelevant text
price_data <- gsub("\t", "", price_data)
price_data <- gsub("\n", "", price_data)
# using css selectors to scrape the colour section
colour_data_html <- html_nodes(webpage, '#colour-picker-value')
#converting the colour to text
colour_data <- html_text(colour_data_html)
# creating the dataframe
burberry_df <- data.frame(ID = id_data, Name = names_data, Price = price_data, Colour = colour_data)
在这个查询中,有时候w2,w3或w4可以返回null,这是预期的行为,但当其中任何一个为null时,整个结果为null或
//Match gs to searched w
MATCH (w1:W {name: "****"})-[:REL]->(gs:G)
WITH w1, COLLECT(DISTINCT gs) AS gsCol, SIZE((w1)-[:REL]-()) AS gCount
OPTIONAL MATCH (w1)-[:REL]-()-[:SIMILAR*0..1]->(gs:G)
WITH w1, gsCol, gCount, COLLECT(DISTINCT gs) AS similarGs
//Match all ws that contain gs in searched w or where similar as wsCol
OPTIONAL MATCH (w1)-[c2a:REL]->(g4:G)-[c2b:REL|:SIMILAR*0..1]-(ws:W)
WHERE c2a.amount - 10 < last(c2b).amount < c2a.amount + 10
WITH w1, gsCol, similarGs, gCount, COLLECT(DISTINCT ws) AS ws2, COLLECT(DISTINCT ws) AS ws3, COLLECT(DISTINCT ws) AS ws4
//Match ws from wsCol where all gs in new matched ws are same
UNWIND ws2 as w2
OPTIONAL MATCH (w2)-[c3:REL]->(g3:G)
WITH w1, w2, ws3, ws4, gsCol, similarGs, gCount, COLLECT(g3) AS gs3, SIZE((w2)-[:REL]->()) as gCount3, SUM(c3.amount) AS c3amount
WHERE ALL(x in gs3 WHERE x IN gsCol)
WITH w1, w2, ws3, ws4, gsCol, similarGs, gCount, gCount3, c3amount
WHERE gCount3 = gCount AND c3amount = 100
WITH COLLECT(w2) AS ws2Col, w1, ws3, ws4, gsCol, similarGs, gCount
//Match ws with gs that are in searched or similar to searched w
UNWIND ws3 as w3
WITH w1, w3, ws4, gsCol, similarGs, gCount, ws2Col
OPTIONAL MATCH (w3)-[c4:REL]->(g4:G)
WITH w1, w3, ws4, ws2Col, gsCol, similarGs, gCount, COLLECT(g4) AS gs4, SIZE((w3)-[:REL]->()) AS gCount4, SUM(c4.amount) AS c4amount
WHERE ALL(x in gs4 WHERE x in similarGs)
WITH w1, w3, ws4, ws2Col, gsCol, similarGs, gCount, gs4, gCount4
WHERE gCount4 = gCount AND c4amount = 100 AND NOT(w3 IN ws2Col)
WITH COLLECT(w3) AS ws3Col, w1, w3, ws4, ws2Col, gsCol, gCount, similarGs
//Match ws where depending on number of gs in w 1 or 2+ gs match searched w
UNWIND ws4 AS w4
OPTIONAL MATCH (w4)-[c5b:REL]->(g5:G)
WITH w1, w4, ws2Col, ws3Col, gsCol, similarGs, gCount, sum(c5b.amount) AS c6amount, SIZE((w4)-[:REL]-()) as gCount5, collect(g5) AS gs5, max(c5b.amount) as c6max
WHERE ALL(x IN gs5 WHERE x IN gsCol) AND (CASE WHEN gCount > 2 THEN c6amount > 25 ELSE c6amount > 65 END) AND NOT(w4 in ws2Col) AND NOT(w4 in ws3Col)
WITH COLLECT(w4) AS ws4Col, w1, ws2Col, ws3Col, w4, gsCol, similarGs, gCount, c6amount, gCount5, gs5, c6max
UNWIND ws2Col AS ws2a UNWIND ws3Col AS ws3a UNWIND ws4Col AS ws4a
RETURN collect(distinct ws2a) AS match1, collect(distinct ws3a) AS match2, collect(distinct ws4a) AS match3
如果match2为null,我希望在match1和/或match3中看到一些结果。
我尝试在没有collect(w2),collect(w3)和collect(w4)的情况下运行查询,但这只会导致查询超时或耗尽堆大小。
任何人都可以建议一种避免可选匹配的方法,返回null删除查询中的所有内容或为其他可选匹配返回null吗?
编辑1 -
在可选匹配的第二个位置找到了查询可以破解的点...
╒════════╤════════╤════════╕
│"match1"│"match2"│"match3"│
╞════════╪════════╪════════╡
│[] │[] │[] │
└────────┴────────┴────────┘
即使我在此时运行返回,如果w3为null,ws2Col也会返回null
编辑2 -
@BrunoPeres答案几乎就在那里,迈出了一大步,越来越近了。必须将第二个和第三个AND NOT(w3 IN ws2Col)
更改为COLLECT
以使查询不丢弃这些集合,如果其中一个集合为空。以下是可能遇到此问题的最终查询。
FILTER
答案 0 :(得分:1)
根据文档The in operator and null,当您测试null
是IN
给定列表时,返回值为null
:
因此,以下表达式的返回值为null:
RETURN null IN [1, 2, 3]
╒═══════════════════╕
│"null IN [1, 2, 3]"│
╞═══════════════════╡
│null │
└───────────────────┘
因此,表达式NOT(null IN [1, 2, 3])
的返回也将为空。
我认为您可以修改查询,将测试更改为:
AND NOT(w3 IS NULL OR w3 IN ws2Col)
即:当w3
为null
时,它不被视为列表中的元素。