R编程 - 如何使用gregexpr删除在文本中多次出现的字符串?

时间:2017-05-29 02:56:13

标签: r grep

实施例: 在下面的例子中,我想要实现的是删除所有以单词'亨利'开头的句子,并且单词' new'在句子的中间,以“钢笔”结束。'。

result = gsub(pattern='((Henry).*(new).*(pen))+',replacement='',text)

我做了什么:

"Henry's pen costs him $2."

我想要实现的目标:

class BaseViewController: UIViewController, SlideMenuDelegate {

override func viewDidLoad() {
    super.viewDidLoad()
    // Do any additional setup after loading the view.
}

override func didReceiveMemoryWarning() {
    super.didReceiveMemoryWarning()
    // Dispose of any resources that can be recreated.
}

func slideMenuItemSelectedAtIndex(_ index: Int32) {
    let topViewController : UIViewController = self.navigationController!.topViewController!
    print("View Controller is : \(topViewController) \n", terminator: "")
    switch(index){
    case 0:
        print("Locations\n", terminator: "")

        self.openViewControllerBasedOnIdentifier("Locations")

        break
    case 1:
        print("Offers\n", terminator: "")

        self.openViewControllerBasedOnIdentifier("Offers")

        break
    case 2:
        print("Feedback\n", terminator: "")

        self.openViewControllerBasedOnIdentifier("Feedback")

        break
    case 3:
        print("About\n", terminator: "")

        self.openViewControllerBasedOnIdentifier("About")

        break
    case 4:
        for key in UserDefaults.standard.dictionaryRepresentation().keys {
            UserDefaults.standard.removeObject(forKey: key)
        }
        //fb logout
        if(FBSDKAccessToken.current() != nil) {
            FBSDKAccessToken.setCurrent(nil)
            FBSDKProfile.setCurrent(nil)
        }

        self.openViewControllerBasedOnIdentifier("SocialLogin")
    default:
        print("default\n", terminator: "")
    }
}

func openViewControllerBasedOnIdentifier(_ strIdentifier:String){
    let destViewController : UIViewController = self.storyboard!.instantiateViewController(withIdentifier: strIdentifier)

    let topViewController : UIViewController = self.navigationController!.topViewController!

    if (topViewController.restorationIdentifier! == destViewController.restorationIdentifier!){
        print("Same VC")
    } else {
        self.navigationController!.pushViewController(destViewController, animated: true)
    }
}

func addSlideMenuButton(){
    let btnShowMenu = UIButton(type: UIButtonType.system)
    btnShowMenu.setImage(self.defaultMenuImage(), for: UIControlState())
    btnShowMenu.frame = CGRect(x: 0, y: 0, width: 30, height: 30)
    btnShowMenu.addTarget(self, action: #selector(BaseViewController.onSlideMenuButtonPressed(_:)), for: UIControlEvents.touchUpInside)
    let customBarItem = UIBarButtonItem(customView: btnShowMenu)
    self.navigationItem.leftBarButtonItem = customBarItem;
}

func defaultMenuImage() -> UIImage {
    var defaultMenuImage = UIImage()

    UIGraphicsBeginImageContextWithOptions(CGSize(width: 30, height: 22), false, 0.0)

    UIColor.black.setFill()
    UIBezierPath(rect: CGRect(x: 0, y: 3, width: 30, height: 1)).fill()
    UIBezierPath(rect: CGRect(x: 0, y: 10, width: 30, height: 1)).fill()
    UIBezierPath(rect: CGRect(x: 0, y: 17, width: 30, height: 1)).fill()

    UIColor.white.setFill()
    UIBezierPath(rect: CGRect(x: 0, y: 4, width: 30, height: 1)).fill()
    UIBezierPath(rect: CGRect(x: 0, y: 11,  width: 30, height: 1)).fill()
    UIBezierPath(rect: CGRect(x: 0, y: 18, width: 30, height: 1)).fill()

    defaultMenuImage = UIGraphicsGetImageFromCurrentImageContext()!

    UIGraphicsEndImageContext()

    return defaultMenuImage;
}

func onSlideMenuButtonPressed(_ sender : UIButton){
    if (sender.tag == 10)
    {
        // To Hide Menu If it already there
        self.slideMenuItemSelectedAtIndex(-1);

        sender.tag = 0;

        let viewMenuBack : UIView = view.subviews.last!

        UIView.animate(withDuration: 0.3, animations: { () -> Void in
            var frameMenu : CGRect = viewMenuBack.frame
            frameMenu.origin.x = -1 * UIScreen.main.bounds.size.width
            viewMenuBack.frame = frameMenu
            viewMenuBack.layoutIfNeeded()
            viewMenuBack.backgroundColor = UIColor.clear
            }, completion: { (finished) -> Void in
                viewMenuBack.removeFromSuperview()
        })

        return
    }

    sender.isEnabled = false
    sender.tag = 10

    let menuVC : MenuViewController = self.storyboard!.instantiateViewController(withIdentifier: "MenuViewController") as! MenuViewController
    menuVC.btnMenu = sender
    menuVC.delegate = self
    self.view.addSubview(menuVC.view)
    self.addChildViewController(menuVC)
    menuVC.view.layoutIfNeeded()


    menuVC.view.frame=CGRect(x: 0 - UIScreen.main.bounds.size.width, y: 0, width: UIScreen.main.bounds.size.width, height: UIScreen.main.bounds.size.height);

    UIView.animate(withDuration: 0.3, animations: { () -> Void in
        menuVC.view.frame=CGRect(x: 0, y: 0, width: UIScreen.main.bounds.size.width, height: UIScreen.main.bounds.size.height);
        sender.isEnabled = true
        }, completion:nil)
}
}

我取得的成就:

""

我不太确定我的代码出了什么问题,有人能指出我正确的方向吗?

2 个答案:

答案 0 :(得分:1)

正如@thelatemail建议的那样,您可以先在text分割.以获取使用

的句子向量
strsplit(text, "(?<=\\.)\\s+", perl = TRUE)

其中模式"(?<=\\.)\\s+"表示我们在\\s+后面的可选空间(.)进行拆分(lookbehind断言(?<=\\.))。一旦我们完成了这项工作,我们可以检查每个句子是否符合您的标准并过滤掉那些不符合条件的句子。然后我们只需要将剩余的句子再次粘贴在一起:

library(magrittr)
filteredText <- strsplit(text, "(?<=\\.)\\s+", perl = TRUE)[[1]] %>%
        grep(pattern = "^Henry.*new.*pen\\.$", x = ., value = TRUE, invert = TRUE) %>%
        paste(collapse = " ")
# 
filteredText
# [1] "Henry's pen costs him $2."

答案 1 :(得分:0)

你需要按句子标记。您可以将strsplitsep = '\\.'一起使用来进行近似,但是因为文本会失败,例如不分割?或拆分U.S.A.。但是,在这一点上,使用更好的句子标记器并不是那么难,这要归功于tidytext,它可以方便地将tokenizers包裹在一个整洁的框架中。

你可以标记为句子,然后使用正则表达式:

library(tidyverse)
library(tidytext)

text = 'Henry just bought a new black pen. Henry\'s pen costs him $2. Henry buys a new blue pen.'

data_frame(text) %>% 
    unnest_tokens(sentence, text, 'sentences', to_lower = FALSE) %>% 
    filter(!grepl('^Henry ', sentence), 
           !grepl('.new.{2,}', sentence),
           !grepl('pen.$', sentence))
#> # A tibble: 1 x 1
#>                    sentence
#>                       <chr>
#> 1 Henry's pen costs him $2.

...或者重新定义为单词以使用更基本的比较:

data_frame(text) %>% 
    unnest_tokens(sentence, text, 'sentences', to_lower = FALSE) %>% 
    unnest_tokens(word, sentence, drop = FALSE) %>% 
    group_by(sentence) %>% 
    filter(first(word) != 'henry',
           !'new' %in% word,
           last(word) != 'pen')
#> # A tibble: 5 x 2
#> # Groups:   sentence [1]
#>                    sentence    word
#>                       <chr>   <chr>
#> 1 Henry's pen costs him $2. henry's
#> 2 Henry's pen costs him $2.     pen
#> 3 Henry's pen costs him $2.   costs
#> 4 Henry's pen costs him $2.     him
#> 5 Henry's pen costs him $2.       2