具有严格格式的可选字段

时间:2018-02-13 20:01:42

标签: rust nom

我正在尝试构建 nom 解析器来检查ID为UUID的网址

rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912

我创建了以下内容:

extern crate uuid;
use uuid::Uuid;

named!(room_uuid<&str, Option<Uuid>>,
    do_parse!(
        tag_s!("rooms") >>
        id: opt!(complete!(preceded!(
            tag_s!("/"),
            map_res!(take_s!(36), FromStr::from_str)
        ))) >>

        (id)
    )
);

几乎可以处理所有情况:

assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("/", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));

除了ID不是有效UUID的情况外:

assert!(room_uuid("rooms/123").is_err()); # it fails
# room_uuid("rooms/123").to_result() => Ok(None)

据我所知,这是因为opt!将内部Err转换为None

我想将ID作为可选部分,但如果它存在则应该是有效的UUID 不幸的是,我不明白如何将两者结合起来:选择性和严格格式。

3 个答案:

答案 0 :(得分:2)

鉴于解析URL不需要流式界面,您可以使用synom代替。它被保留为syn箱子的一部分,但遗憾的是不再维护(它是merged into syn并且改为仅处理生锈令牌)。

可悲的是,synom未提供take_s!eof!(后者将禁止尾随“未解析的”123),但它很容易实现那些。

使用eof!也意味着您无法返回未解析的"/"(虽然我认为这是一件好事);并且嵌套的option!最后需要一些解包(您可以返回Option<Option<Uuid>>来检测尾随"/")。

Playground

#[macro_use]
extern crate synom;

extern crate uuid;
use uuid::Uuid;

macro_rules! take_s {
    ($i:expr, $length:expr) => {{
        let length: usize = $length;
        if 0 == length {
            synom::IResult::Done($i, "")
        } else {
            let mut ci = $i.char_indices().skip(length - 1);
            match ci.next() {
                None => synom::IResult::Error,
                Some(_) => {
                    match ci.next() {
                        None => synom::IResult::Done("", $i),
                        Some((pos, _)) => {
                            let (value, rem) = $i.split_at(pos);
                            synom::IResult::Done(rem, value)
                        },
                    }
                }
            }
        }
    }};
}

macro_rules! eof {
    ($i:expr,) => {{
        if $i.is_empty() {
            synom::IResult::Done($i, ())
        } else {
            synom::IResult::Error
        }
    }};
}

named!(room_uuid -> Option<Uuid>,
    do_parse!(
        tag!("rooms") >>
        id: option!(preceded!(
            tag!("/"),
            option!(
                switch!(map!(take_s!(36), str::parse),
                    Ok(v) => value!(v)
                )
            )
        )) >>
        eof!() >>

        (id.unwrap_or(None))
    )
);

fn main() {
    use synom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(
        room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done(
            "",
            Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())
        )
    );
    assert_eq!(room_uuid("rooms/123"), Error);
}

答案 1 :(得分:1)

我过去几周才开始自己使用nom,但我找到了一种方法来解决这个问题。它不仅适用于宏,但它通过一次修改确实提供了正确的行为。在没有给出UUID之后,我吞下/而不是让它悬空。

#[macro_use]
extern crate nom;
extern crate uuid;

use std::str::FromStr;
use nom::IResult;
use uuid::Uuid;

fn room_uuid(input: &str) -> IResult<&str, Option<Uuid>> {
    // Check that it starts with "rooms"
    let res = tag_s!(input, "rooms");
    let remaining = match res {
        IResult::Incomplete(i) => return IResult::Incomplete(i),
        IResult::Error(e) => return IResult::Error(e),
        IResult::Done(i, _) => i
    };

    // If a slash is not present, return early
    let optional_slash = opt!(remaining, tag_s!("/"));
    let remaining = match optional_slash {
        IResult::Error(_) |
        IResult::Incomplete(_) => return IResult::Done(remaining, None),
        IResult::Done(i, _) => i
    };

    // If something follows a slash, make sure
    // it's a valid UUID
    if remaining.len() > 0 {
        let res = complete!(remaining, map_res!(take_s!(36), FromStr::from_str));
        match res {
            IResult::Done(i, o) => IResult::Done(i, Some(o)),
            IResult::Error(e) => IResult::Error(e),
            IResult::Incomplete(n) => IResult::Incomplete(n)
        }
    } else {
        // This branch allows for "rooms/"
        IResult::Done(remaining, None)
    }
}

#[test]
fn match_room_plus_uuid() {
    use nom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
    assert!(room_uuid("rooms/123").is_err());
}

答案 2 :(得分:1)

好的,所以我使用了nom和扩展的网址格式api/v1/rooms/UUID/tracks/UUID

基础知识与以前相同:您要检查eof,忽略尾随"/",永远不要等待不完整的结果(alt_complete!在这里做得很好。)< / p>

关于您的ErrorKind::Verify愿望:我不认为错误类型实际上很重要,只需忽略它,或将其映射到您想要的任何手动。

小心alt_complete!分支:如果出现重叠,首选选项(通常是&#34;较长的一个&#34;)应首先出现。

我喜欢我的with!帮助器,但您也可以内联它。

游乐场不支持nom,所以这次没有链接。

#[macro_use]
extern crate nom;

extern crate uuid;
use uuid::Uuid;

named!(uuid<&str, Uuid>, preceded!(
    tag_s!("/"),
    map_res!(take_s!(36), str::parse)
));

#[derive(Clone, PartialEq, Eq, Debug)]
enum ApiRequest {
    Rooms,
    Room { room: Uuid },
    Tracks { room: Uuid },
    Track { room: Uuid, track: Uuid },
}

/// shortcut for: `do_parse!(name: expr >> r: otherexpr >> (r))`
///
/// `otherexpr` should use `name`, otherwise you could just use `preceded!`.
macro_rules! with {
    ($i:expr, $var:ident: $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac!($($args)*) >> r: $($rest)* >> (r));
    };
    ($i:expr, $var:ident: $submac:ident >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac >> r: $($rest)* >> (r));
    };
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_complete!(
        preceded!(tag_s!("/rooms"), alt_complete!(
            with!(room: uuid >> alt_complete!(
                preceded!(tag_s!("/tracks"), alt_complete!(
                    with!(track: uuid >> alt_complete!(
                        // ... sub track requests?
                        value!(ApiRequest::Track{room, track})
                    ))
                    |
                    value!(ApiRequest::Tracks{room})
                ))
                // other room requests
                |
                value!(ApiRequest::Room{room})
            ))
            |
            value!(ApiRequest::Rooms)
        ))
        // | ... other requests
    )
));

named!(api<&str, ApiRequest>, terminated!(
    alt_complete!(
        apiv1
        // | ... other versions
        // also could wrap in new enum like:
        //     apiv1 => { ApiRequest::V1 }
        //     |
        //     apiv2 => { ApiRequest::V2 }
    ),
    tuple!(
        alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
        eof!() // make sure full URL was parsed
    )
));

fn main() {
    use nom::IResult::*;
    use nom::ErrorKind;

    let room = Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap();
    let track = Uuid::parse_str("83d235e8-03cd-420d-a8c6-6e42440a5573").unwrap();

    assert_eq!(api("/api/v1/rooms"), Done("", ApiRequest::Rooms));
    assert_eq!(api("/api/v1/rooms/"), Done("", ApiRequest::Rooms));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(api("/api/v1"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/foo"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/rooms/123"), Error(ErrorKind::Eof));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/bar"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/123"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(api("/api/v2"), Error(ErrorKind::Alt));
}

您还可以使用更严格的alt_full_opt_slash!分支方法,这样可以确保分支仅在完全解析输入时才匹配。

然后你可以使用更多&#34; flat&#34;方式(尽管嵌套分支应该仍在工作)来解析备选方案(虽然这意味着你可能最终解析一些UUID不止一次;现在所有错误都是Alt种类):

/// Similar to alt_complete, but also requires the branch parses until
/// the end of the input (but ignores a trailing "/").
macro_rules! alt_full_opt_slash {
    (__impl_push2 ($i:expr,) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($new)*), $($rest)*)
    };
    (__impl_push2 ($i:expr, $($result:tt)+) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($result)+ | $($new)*), $($rest)*)
    };
    (__impl_push ($($result:tt)*) ($($new:tt)*), $($rest:tt)*) => {
        // modify branch:
        alt_full_opt_slash!(__impl_push2 ($($result)*) (
            terminated!(
                $($new)*,
                tuple!(
                    alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
                    eof!() // make sure full URL was parsed
                )
            )
        ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) => { $gen } ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e => { $gen } ), $($rest)*)
    };
    (__impl ($i:expr, $($result:tt)*), __end) => {
        alt_complete!($i, $($result)*)
    };
    ($i:expr, $($rest:tt)*) => {{
        alt_full_opt_slash!(__impl ($i, ), $($rest)* | __end)
    }};
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_full_opt_slash!(
        do_parse!(
            tag_s!("/rooms") >>
            (ApiRequest::Rooms)
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            (ApiRequest::Room{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            (ApiRequest::Tracks{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            track: uuid >>
            (ApiRequest::Track{room, track})
        )
    )
));

named!(api<&str, ApiRequest>, alt_complete!(
    apiv1
    // | ... other versions
    // also could wrap in new enum like:
    //     apiv1 => { ApiRequest::V1 }
    //     |
    //     apiv2 => { ApiRequest::V2 }
));