
时间:2018-06-26 16:40:57

标签: r text stringi


Patient Name- John Smith
Number of dx codes: 123
Number of pr codes: 678
Charges: 910
Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
Duis arcu ipsum, ultrices placerat mattis ac, venenatis eu magna. 
Donec interdum iaculis lacus. Nunc in placerat augue. 
In ut odio et dui aliquam sagittis at id augue. 
Patient Name- Jane Smith
Number of dx codes: 234
Number of pr codes: 567
Charges: 1011


Patient Name    DxCodes    PrCodes    Charges
John Smith      123        678        910
Jane Smith      234        567        1011


John Smith
Jane Smith

Number of dx codes: 123
Number of pr codes: 678
Charges: 910
Number of dx codes: 234
Number of pr codes: 567
Charges: 1011

但是不确定如何将上述两个数据帧转换为所需格式吗?我是否应该从一开始就使用其他方法? 一定会感谢任何帮助。谢谢!

3 个答案:

答案 0 :(得分:3)


inx1 <- grep("Patient Name", txt)
inx2 <- grep("Number of dx codes:", txt)
inx3 <- grep("Number of pr codes:", txt)
inx4 <- grep("Charges", txt)

PatientName <- sub("^Patient Name[- ]*", "", txt[inx1])
DxCodes <- sub("^.*: *([[:digit:]]*)$", "\\1", txt[inx2])
PrCodes <- sub("^.*: *([[:digit:]]*)$", "\\1", txt[inx3])
Charges <- sub("^.*: *([[:digit:]]*)$", "\\1", txt[inx4])

DxCodes <- as.integer(DxCodes)
PrCodes <- as.integer(PrCodes)
Charges <- as.integer(Charges)

result <- data.frame(PatientName, DxCodes, PrCodes, Charges)
#  PatientName DxCodes PrCodes Charges
#1  John Smith     123     678     910
#2  Jane Smith     234     567    1011


conn <- textConnection("
Patient Name- John Smith
Number of dx codes: 123
Number of pr codes: 678
Charges: 910
Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
Duis arcu ipsum, ultrices placerat mattis ac, venenatis eu magna. 
Donec interdum iaculis lacus. Nunc in placerat augue. 
In ut odio et dui aliquam sagittis at id augue. 
Patient Name- Jane Smith
Number of dx codes: 234
Number of pr codes: 567
Charges: 1011

txt <- readLines(conn)

答案 1 :(得分:1)



public class ServerApplication {
    public static void main(String[] args) {
        MyResourceLoader rLoader = new MyResourceLoader();
        SpringApplication app = new SpringApplicationBuilder().build();

public class UserController {
    public String hello(){
        System.out.println("hello hello hello...");
        return "hello";

public class MyResourceLoader extends DefaultResourceLoader {
    private ClassLoader cl = new MyClassLoader();
    public Resource getResource(String location) {
        System.out.println("getResource: "+location);
        return super.getResource(location);
    public ClassLoader getClassLoader() {
        return cl;

public class MyClassLoader extends ClassLoader {
    public Class<?> loadClass(String name) throws ClassNotFoundException {
        if (name.equals("com.abc.bootbycustomloader.controller.UserController")) {
            // assump that UserController is the encrypted class
            // i need to load this encrypted class, and decrypted it!
            System.out.println("!!!!!encrypted!!!!! : " + name);

            // load the class from a special place, mock the decrypted processing
            String path = "D:\\_clz\\UserController.class";
            byte[] data = new byte[0];
            try {
                data = Files.readAllBytes(Paths.get(path));
            } catch (IOException e) {
            // mock decrypted processing success, return the decrypted class
            Class<?> clz = defineClass(name, data, 0, data.length);  
            return clz;
        } else {
            // assump that other class is not encrypted class
            // just load it as usual
            return super.loadClass(name);


txt <- c(
  'Patient Name- John Smith',
  'Number of dx codes: 123',
  'Number of pr codes: 678',
  'Charges: 910',
  'Lorem ipsum dolor sit amet, consectetur adipiscing elit. ',
  'Duis arcu ipsum, ultrices placerat mattis ac, venenatis eu magna. ',
  'Donec interdum iaculis lacus. Nunc in placerat augue. ',
  'In ut odio et dui aliquam sagittis at id augue. ',
  'Patient Name- Jane Smith',
  'Number of dx codes: 234',
  'Number of pr codes: 567',
  'Charges: 1011')


patients <- split(txt, cumsum(grepl("^Patient Name", txt)))
# List of 2
#  $ 1: chr [1:8] "Patient Name- John Smith" "Number of dx codes: 123" "Number of pr codes: 678" "Charges: 910" ...
#  $ 2: chr [1:4] "Patient Name- Jane Smith" "Number of dx codes: 234" "Number of pr codes: 567" "Charges: 1011"


patients2 <- lapply(patients, function(pat) {
  nm <- sapply(strsplit(pat[1], "-")[[1]][-1], trimws)
  dx <- as.integer(strsplit(pat[2], ":")[[1]][2])
  pr <- as.integer(strsplit(pat[3], ":")[[1]][2])
  ch <- as.integer(strsplit(pat[4], ":")[[1]][2])
  rest <- paste(pat[-(1:4)], collapse="\n")
  data.frame(name = nm, dx = dx, pr = pr, charges = ch, rest = rest,
             stringsAsFactors = FALSE)
# List of 2
#  $ 1:'data.frame':    1 obs. of  5 variables:
#   ..$ name   : chr "John Smith"
#   ..$ dx     : int 123
#   ..$ pr     : int 678
#   ..$ charges: int 910
#   ..$ rest   : chr "Lorem ipsum dolor sit amet, consectetur adipiscing elit. \nDuis arcu ipsum, ultrices placerat mattis ac, venenatis eu magna. \n"| __truncated__
#  $ 2:'data.frame':    1 obs. of  5 variables:
#   ..$ name   : chr "Jane Smith"
#   ..$ dx     : int 234
#   ..$ pr     : int 567
#   ..$ charges: int 1011
#   ..$ rest   : chr ""

答案 2 :(得分:1)


df <- " 
Patient Name- John Smith
Number of dx codes: 123
Number of pr codes: 678
Charges: 910
Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
Duis arcu ipsum, ultrices placerat mattis ac, venenatis eu magna. 
Donec interdum iaculis lacus. Nunc in placerat augue. 
In ut odio et dui aliquam sagittis at id augue. 
Patient Name- Jane Smith
Number of dx codes: 234
Number of pr codes: 567
Charges: 1011"

    df_b <- data.frame(dx=str_match_all(df, "(?<=dx codes:) [[:digit:]]*"), 
              pr=str_match_all(df, "(?<=pr codes:) [[:digit:]]*"),
    charges=str_match_all(df,"(?<=harges:) [[:digit:]]*")) 
    names(df_b) <- c("dx", "pr", "charges")
# it changed names by the structure but you may rename it easily:
    dx   pr charges
1  123  678     910
2  234  567    1011