我正在尝试构建 nom 解析器来检查ID为UUID的网址
rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912
我创建了以下内容:
extern crate uuid;
use uuid::Uuid;
named!(room_uuid<&str, Option<Uuid>>,
do_parse!(
tag_s!("rooms") >>
id: opt!(complete!(preceded!(
tag_s!("/"),
map_res!(take_s!(36), FromStr::from_str)
))) >>
(id)
)
);
几乎可以处理所有情况:
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("/", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
除了ID不是有效UUID的情况外:
assert!(room_uuid("rooms/123").is_err()); # it fails
# room_uuid("rooms/123").to_result() => Ok(None)
据我所知,这是因为opt!
将内部Err
转换为None
。
我想将ID作为可选部分,但如果它存在则应该是有效的UUID 不幸的是,我不明白如何将两者结合起来:选择性和严格格式。
答案 0 :(得分:2)
鉴于解析URL不需要流式界面,您可以使用synom
代替。它被保留为syn
箱子的一部分,但遗憾的是不再维护(它是merged into syn
并且改为仅处理生锈令牌)。
可悲的是,synom
未提供take_s!
和eof!
(后者将禁止尾随“未解析的”123
),但它很容易实现那些。
使用eof!
也意味着您无法返回未解析的"/"
(虽然我认为这是一件好事);并且嵌套的option!
最后需要一些解包(您可以返回Option<Option<Uuid>>
来检测尾随"/"
)。
#[macro_use]
extern crate synom;
extern crate uuid;
use uuid::Uuid;
macro_rules! take_s {
($i:expr, $length:expr) => {{
let length: usize = $length;
if 0 == length {
synom::IResult::Done($i, "")
} else {
let mut ci = $i.char_indices().skip(length - 1);
match ci.next() {
None => synom::IResult::Error,
Some(_) => {
match ci.next() {
None => synom::IResult::Done("", $i),
Some((pos, _)) => {
let (value, rem) = $i.split_at(pos);
synom::IResult::Done(rem, value)
},
}
}
}
}
}};
}
macro_rules! eof {
($i:expr,) => {{
if $i.is_empty() {
synom::IResult::Done($i, ())
} else {
synom::IResult::Error
}
}};
}
named!(room_uuid -> Option<Uuid>,
do_parse!(
tag!("rooms") >>
id: option!(preceded!(
tag!("/"),
option!(
switch!(map!(take_s!(36), str::parse),
Ok(v) => value!(v)
)
)
)) >>
eof!() >>
(id.unwrap_or(None))
)
);
fn main() {
use synom::IResult::*;
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("", None));
assert_eq!(
room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
Done(
"",
Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())
)
);
assert_eq!(room_uuid("rooms/123"), Error);
}
答案 1 :(得分:1)
我过去几周才开始自己使用nom,但我找到了一种方法来解决这个问题。它不仅适用于宏,但它通过一次修改确实提供了正确的行为。在没有给出UUID之后,我吞下/
而不是让它悬空。
#[macro_use]
extern crate nom;
extern crate uuid;
use std::str::FromStr;
use nom::IResult;
use uuid::Uuid;
fn room_uuid(input: &str) -> IResult<&str, Option<Uuid>> {
// Check that it starts with "rooms"
let res = tag_s!(input, "rooms");
let remaining = match res {
IResult::Incomplete(i) => return IResult::Incomplete(i),
IResult::Error(e) => return IResult::Error(e),
IResult::Done(i, _) => i
};
// If a slash is not present, return early
let optional_slash = opt!(remaining, tag_s!("/"));
let remaining = match optional_slash {
IResult::Error(_) |
IResult::Incomplete(_) => return IResult::Done(remaining, None),
IResult::Done(i, _) => i
};
// If something follows a slash, make sure
// it's a valid UUID
if remaining.len() > 0 {
let res = complete!(remaining, map_res!(take_s!(36), FromStr::from_str));
match res {
IResult::Done(i, o) => IResult::Done(i, Some(o)),
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(n) => IResult::Incomplete(n)
}
} else {
// This branch allows for "rooms/"
IResult::Done(remaining, None)
}
}
#[test]
fn match_room_plus_uuid() {
use nom::IResult::*;
assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
assert!(room_uuid("rooms/123").is_err());
}
答案 2 :(得分:1)
好的,所以我使用了nom
和扩展的网址格式api/v1/rooms/UUID/tracks/UUID
。
基础知识与以前相同:您要检查eof
,忽略尾随"/"
,永远不要等待不完整的结果(alt_complete!
在这里做得很好。)< / p>
关于您的ErrorKind::Verify
愿望:我不认为错误类型实际上很重要,只需忽略它,或将其映射到您想要的任何手动。
小心alt_complete!
分支:如果出现重叠,首选选项(通常是&#34;较长的一个&#34;)应首先出现。
我喜欢我的with!
帮助器,但您也可以内联它。
游乐场不支持nom
,所以这次没有链接。
#[macro_use]
extern crate nom;
extern crate uuid;
use uuid::Uuid;
named!(uuid<&str, Uuid>, preceded!(
tag_s!("/"),
map_res!(take_s!(36), str::parse)
));
#[derive(Clone, PartialEq, Eq, Debug)]
enum ApiRequest {
Rooms,
Room { room: Uuid },
Tracks { room: Uuid },
Track { room: Uuid, track: Uuid },
}
/// shortcut for: `do_parse!(name: expr >> r: otherexpr >> (r))`
///
/// `otherexpr` should use `name`, otherwise you could just use `preceded!`.
macro_rules! with {
($i:expr, $var:ident: $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
do_parse!($i, $var: $submac!($($args)*) >> r: $($rest)* >> (r));
};
($i:expr, $var:ident: $submac:ident >> $($rest:tt)*) => {
do_parse!($i, $var: $submac >> r: $($rest)* >> (r));
};
}
// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
alt_complete!(
preceded!(tag_s!("/rooms"), alt_complete!(
with!(room: uuid >> alt_complete!(
preceded!(tag_s!("/tracks"), alt_complete!(
with!(track: uuid >> alt_complete!(
// ... sub track requests?
value!(ApiRequest::Track{room, track})
))
|
value!(ApiRequest::Tracks{room})
))
// other room requests
|
value!(ApiRequest::Room{room})
))
|
value!(ApiRequest::Rooms)
))
// | ... other requests
)
));
named!(api<&str, ApiRequest>, terminated!(
alt_complete!(
apiv1
// | ... other versions
// also could wrap in new enum like:
// apiv1 => { ApiRequest::V1 }
// |
// apiv2 => { ApiRequest::V2 }
),
tuple!(
alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
eof!() // make sure full URL was parsed
)
));
fn main() {
use nom::IResult::*;
use nom::ErrorKind;
let room = Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap();
let track = Uuid::parse_str("83d235e8-03cd-420d-a8c6-6e42440a5573").unwrap();
assert_eq!(api("/api/v1/rooms"), Done("", ApiRequest::Rooms));
assert_eq!(api("/api/v1/rooms/"), Done("", ApiRequest::Rooms));
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
Done("", ApiRequest::Room { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/"),
Done("", ApiRequest::Room { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks"),
Done("", ApiRequest::Tracks { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/"),
Done("", ApiRequest::Tracks { room })
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573"),
Done("", ApiRequest::Track{room, track})
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/"),
Done("", ApiRequest::Track{room, track})
);
assert_eq!(api("/api/v1"), Error(ErrorKind::Alt));
assert_eq!(api("/api/v1/foo"), Error(ErrorKind::Alt));
assert_eq!(api("/api/v1/rooms/123"), Error(ErrorKind::Eof));
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/bar"),
Error(ErrorKind::Eof)
);
assert_eq!(
api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/123"),
Error(ErrorKind::Eof)
);
assert_eq!(api("/api/v2"), Error(ErrorKind::Alt));
}
您还可以使用更严格的alt_full_opt_slash!
分支方法,这样可以确保分支仅在完全解析输入时才匹配。
然后你可以使用更多&#34; flat&#34;方式(尽管嵌套分支应该仍在工作)来解析备选方案(虽然这意味着你可能最终解析一些UUID不止一次;现在所有错误都是Alt
种类):
/// Similar to alt_complete, but also requires the branch parses until
/// the end of the input (but ignores a trailing "/").
macro_rules! alt_full_opt_slash {
(__impl_push2 ($i:expr,) ($($new:tt)*), $($rest:tt)*) => {
alt_full_opt_slash!(__impl ($i, $($new)*), $($rest)*)
};
(__impl_push2 ($i:expr, $($result:tt)+) ($($new:tt)*), $($rest:tt)*) => {
alt_full_opt_slash!(__impl ($i, $($result)+ | $($new)*), $($rest)*)
};
(__impl_push ($($result:tt)*) ($($new:tt)*), $($rest:tt)*) => {
// modify branch:
alt_full_opt_slash!(__impl_push2 ($($result)*) (
terminated!(
$($new)*,
tuple!(
alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
eof!() // make sure full URL was parsed
)
)
), $($rest)*)
};
(__impl ($($result:tt)*), $e:ident | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $e ), $($rest)*)
};
(__impl ($($result:tt)*), $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) ), $($rest)*)
};
(__impl ($($result:tt)*), $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) => { $gen } ), $($rest)*)
};
(__impl ($($result:tt)*), $e:ident => { $gen:expr } | $($rest:tt)*) => {
alt_full_opt_slash!(__impl_push ($($result)*) ( $e => { $gen } ), $($rest)*)
};
(__impl ($i:expr, $($result:tt)*), __end) => {
alt_complete!($i, $($result)*)
};
($i:expr, $($rest:tt)*) => {{
alt_full_opt_slash!(__impl ($i, ), $($rest)* | __end)
}};
}
// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
alt_full_opt_slash!(
do_parse!(
tag_s!("/rooms") >>
(ApiRequest::Rooms)
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
(ApiRequest::Room{room})
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
tag_s!("/tracks") >>
(ApiRequest::Tracks{room})
)
|
do_parse!(
tag_s!("/rooms") >>
room: uuid >>
tag_s!("/tracks") >>
track: uuid >>
(ApiRequest::Track{room, track})
)
)
));
named!(api<&str, ApiRequest>, alt_complete!(
apiv1
// | ... other versions
// also could wrap in new enum like:
// apiv1 => { ApiRequest::V1 }
// |
// apiv2 => { ApiRequest::V2 }
));