如何使用stringr操作非结构化文本数据?

时间:2018-02-03 15:19:59

标签: r dplyr tidyr stringr text-manipulation

我有一个非结构化数据,我希望对其进行操作,使其成为可用于分析的数据。

以下是我的数据框,其中包含97个观察值和5个变量,即DRUG_NAMEDRUG_STRENGTH_NODRUG_STRENGTH_UNITINSTRUCTIONSEXPANDED_INSTRUCTIONS

df <- structure(list(DRUG_NAME = c("ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", 
"ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", 
"ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", "CALCIUM ACETATE 667MG TAB", 
"CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
"CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
"CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
"CALCIUM ACETATE 667MG TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
"CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
"CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
"CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
"CALCIUM CARB 625MG TABLET", "CALCIUM CARB 625MG TABLET", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
"ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
"FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
"FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
"FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
"FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
"FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
"FUROSEMIDE 40MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
"HYDROCHLOROTHIAZIDE 25MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
"HYDROCHLOROTHIAZIDE 25MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
"SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
"SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
"SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
"SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
"SODIUM BICARBONATE 500MG CAP", "SPIRONOLACTONE 25MG TABLET", 
"SPIRONOLACTONE 25MG TABLET", "SPIRONOLACTONE 25MG TABLET", "SPIRONOLACTONE 25MG TABLET", 
"SPIRONOLACTONE 25MG TABLET"), DRUG_STRENGTH_NO = c(0.25, 0.25, 
0.25, 0.25, 0.25, 0.25, 0.25, 667, 667, 667, 667, 667, 667, 667, 
667, 667, 667, 667, 1250, 1250, 1250, 1250, 1250, 1250, 1250, 
625, 625, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 
4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 200, 200, 
200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 
200, 200, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 
40, 40, 40, 25, 25, 25, 25, 25, 500, 500, 500, 500, 500, 500, 
500, 500, 500, 25, 25, 25, 25, 25), DRUG_STRENGTH_UNIT = c("MCG", 
"MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "IU", "IU", "IU", "IU", "IU", "IU", 
"IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", 
"IU", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
"MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG"), INSTRUCTIONS = c("0.25MCG BIW NR", 
"0.25mcg om", "0.25MCG tiw", "0.25MCG TIW NR", "1 BIW NR", "1 OM", 
"1 TIW", "1 2T", "1 3T", "1 3T WITH MEALS", "1 BD", "1 TDS WITH MEALS", 
"1TAB 3T", "1TAB 3T", "1TAB 3T", "1TAB 3T", "1TAB 3T", "2TAB 3T", 
"1 3T", "1 3t nr", "1 3T WITH MEALS", "1 OM & 2 OA & 2 ON", "1.25G 3T", 
"1.25G 3T", "1.25G 3T NR", "1250MG OM", "2 OM", "1", "1", "1", 
"1", "1 BIW", "1 BIW NR", "1 EOW", "1 EOW", "1 oi", "1 once", 
"1 TAD", "1 TAD", "4000UNIT BIW", "4000UNIT BIW NR", "4000UNIT EOW", 
"4000UNIT OIW", "USE AS DIRECTED.", "(blank)", "1 2T", "1 2T", 
"1 OM", "2 2T", "2 2T NR", "200MG 2T", "200MG 2T NR", "200MG BD", 
"200MG OM", "200MG OM", "200MG TIW", "200MG TIW nr", "400MG 2T", 
"400MG 2T", "400MG 2T NR", "400mg bd", "400MG OM", "1 2T", "1 OM", 
"1 OM", "1 OM PRN", "1.5 2t", "2 2T", "20MG OM", "20MG OM", "20MG OM", 
"40MG 2T", "40MG 2T", "40mg od", "40MG OM", "60MG 2T", "60mg bd", 
"80mg 3t", ".5 om", "0.5 OM", "12.5MG 2T", "12.5MG OM", "12.5MG OM NR", 
"1 2T", "1 3T", "1000MG 3T", "1G TDS", "2 3t", "500MG 2T", "500MG 2T NR", 
"500MG 3T", "500mg tds", "1 OM", "1 OM NR", "12.5MG OM", "12.5MG OM", 
"25MG OM"), EXPANDED_INSTRUCTIONS = c("TAKE ONE TABLET TWICE WEEKLY (NON-REFUNDABLE)", 
"TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET THREE TIMES WEEKLY", 
"TAKE ONE TABLET THREE TIMES WEEKLY (NON-REFUNDABLE)", "TAKE ONE TABLET TWICE WEEKLY (NON-REFUNDABLE)", 
"TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET THREE TIMES WEEKLY", 
"TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY", 
"TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", "TAKE ONE TABLET TWICE DAILY", 
"TAKE ONE TABLET THREE TIMES DAILY WITH MEALS", "TAKE ONE TABLET 3 TIMES DAILY", 
"TAKE ONE TABLET 3 TIMES DAILY (WITH MEALS)", "TAKE ONE TABLET 3 TIMES DAILY on meals", 
"TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", "TAKE ONE TABLET 3 TIMES DAILY with meals, started on 16/4/2010", 
"TAKE TWO TABLETS 3 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY", 
"TAKE ONE TABLET 3 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", 
"TAKE ONE TABLET EVERY MORNING AND TWO TABLETS EVERY AFTERNOON AND TWO TABLETS EVERY NIGHT", 
"TAKE ONE TABLET 3 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", 
"TAKE ONE TABLET 3 TIMES DAILY (NON-REFUNDABLE)", "TAKE TWO TABLETS EVERY MORNING", 
"TAKE TWO TABLETS EVERY MORNING", "INJECT CONTENTS OF ONE SYRINGE EVERY 10 DAYS", 
"INJECT CONTENTS OF ONE SYRINGE EVERY 10 DAYS.", "INJECT CONTENTS OF ONE SYRINGE EVERY 2 WEEKS.", 
"INJECT CONTENTS OF ONE SYRINGE EVERY TWO WEEKS", "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY", 
"INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY (NON-REFUNDABLE)", 
"INJECT CONTENTS OF ONE SYRINGE EVERY FORTNIGHTLY", "INJECT ONE SYRINGE EVERY FORTNIGHTLY", 
"INJECT CONTENTS OF ONE SYRINGE ONCE EVERY TEN DAYS", "INJECT CONTENTS OF ONE SYRINGE ONCE EVERY 10 DAYS", 
"INJECT CONTENTS OF ONE SYRINGE AS DIRECTED (ONCE EVERY 10 DAYS)", 
"INJECT CONTENTS OF ONE SYRINGE AS DIRECTED EVERY 10 DAYS", "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY", 
"INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY (NON-REFUNDABLE)", 
"INJECT CONTENTS OF ONE SYRINGE EVERY FORTNIGHTLY", "INJECT CONTENTS OF ONE SYRINGE ONCE WEEKLY", 
"USE AS DIRECTED. ONCE EVERY 10 DAYS", "INSERT ONE SYRINGE EVERY 10 DAYS AS DIRECTED", 
"TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 2 TIMES DAILY [STOP 2 WEEKS PRIOR TO COLONOSCOPY]", 
"TAKE ONE TABLET EVERY MORNING", "TAKE TWO TABLETS 2 TIMES DAILY", 
"TAKE TWO TABLETS 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET 2 TIMES DAILY", 
"TAKE ONE TABLET 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET TWICE DAILY", 
"TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET EVERY MORNING REDUCE. STOP AT NEXT TCU", 
"TAKE ONE TABLET THREE TIMES WEEKLY", "TAKE ONE TABLET THREE TIMES WEEKLY (NON-REFUNDABLE)", 
"TAKE TWO TABLETS 2 TIMES DAILY", "TAKE TWO TABLETS 2 TIMES DAILY TAKE DAILY", 
"TAKE TWO TABLETS 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE TWO TABLETS TWICE DAILY", 
"TAKE TWO TABLETS EVERY MORNING", "TAKE ONE TABLET 2 TIMES DAILY", 
"TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET EVERY MORNING prn for leg swelling", 
"TAKE ONE TABLET EVERY MORNING WHEN NECESSARY FOR LEG SWELLING", 
"TAKE ONE AND A HALF TABLETS 2 TIMES DAILY", "TAKE TWO TABLETS 2 TIMES DAILY", 
"TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET EVERY MORNING DOSE DECREASED ON 08 10 2015", 
"TAKE HALF TABLET EVERY MORNING started 9/11/12 for edema/ Decreased enalapril 9/11/12", 
"TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 2 TIMES DAILY Dose decreased on 27/1/2010. stop span K", 
"TAKE ONE TABLET ONCE DAILY", "TAKE ONE TABLET EVERY MORNING", 
"TAKE ONE AND A HALF TABLETS 2 TIMES DAILY", "TAKE ONE AND A HALF TABLETS TWICE DAILY", 
"TAKE TWO TABLETS 3 TIMES DAILY", "TAKE HALF TABLET EVERY MORNING", 
"TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET 2 TIMES DAILY", 
"TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET EVERY MORNING (NON-REFUNDABLE)", 
"TAKE ONE CAPSULE 2 TIMES DAILY", "TAKE ONE CAPSULE 3 TIMES DAILY", 
"TAKE TWO CAPSULES 3 TIMES DAILY", "TAKE TWO CAPSULES THREE TIMES DAILY", 
"TAKE TWO CAPSULES 3 TIMES DAILY", "TAKE ONE CAPSULE 2 TIMES DAILY", 
"TAKE ONE CAPSULE 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE CAPSULE 3 TIMES DAILY", 
"TAKE ONE CAPSULE THREE TIMES DAILY", "TAKE ONE TABLET EVERY MORNING", 
"TAKE ONE TABLET EVERY MORNING (NON-REFUNDABLE)", "TAKE HALF TABLET EVERY MORNING", 
"TAKE HALF TABLET EVERY MORNING NEW, STARTED ON 20/4/15,", "TAKE ONE TABLET EVERY MORNING"
)), class = "data.frame", row.names = c(NA, -97L), .Names = c("DRUG_NAME", 
"DRUG_STRENGTH_NO", "DRUG_STRENGTH_UNIT", "INSTRUCTIONS", "EXPANDED_INSTRUCTIONS"
))

我想计算每天或每周或每两周或每月的总剂量,具体取决于变量EXPANDED_INSTRUCTIONS。通过将药物强度乘以no来获得该值。平板电脑/胶囊/注射器的频率。

  1. 如果我们的患者服用“ALPHACALCIDOL 0.25MCG TAB”并指示“每周吃一片”,那么他每周的总剂量为0.5MCG
  2. 如果药物是“ERYTHROPOIETIN ALFA 4,000 IU INJ(EPREX)”并且说明是“每10天一次注射的内容”,则每月剂量为12000IU。
  3. 如果药物是“ERYTHROPOIETIN ALFA 4,000 IU INJ(EPREX)”,并且说明是“每个人每周注射的内容”,则每两周的剂量为4000IU。
  4. 我想要的数据框如下:

    structure(list(DRUG_NAME = c("ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", 
    "ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", 
    "ALPHACALCIDOL 0.25MCG TAB", "ALPHACALCIDOL 0.25MCG TAB", "CALCIUM ACETATE 667MG TAB", 
    "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
    "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
    "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", "CALCIUM ACETATE 667MG TAB", 
    "CALCIUM ACETATE 667MG TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
    "CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
    "CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
    "CALCIUM CARB 1250MG CHEWABLE TAB", "CALCIUM CARB 1250MG CHEWABLE TAB", 
    "CALCIUM CARB 625MG TABLET", "CALCIUM CARB 625MG TABLET", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", 
    "ERYTHROPOIETIN ALFA 4,000 IU INJ (EPREX)", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FERROUS FUMARATE 200MG TABLET", "FERROUS FUMARATE 200MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", "FUROSEMIDE 40MG TABLET", 
    "FUROSEMIDE 40MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
    "HYDROCHLOROTHIAZIDE 25MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
    "HYDROCHLOROTHIAZIDE 25MG TABLET", "HYDROCHLOROTHIAZIDE 25MG TABLET", 
    "SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
    "SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
    "SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
    "SODIUM BICARBONATE 500MG CAP", "SODIUM BICARBONATE 500MG CAP", 
    "SODIUM BICARBONATE 500MG CAP", "SPIRONOLACTONE 25MG TABLET", 
    "SPIRONOLACTONE 25MG TABLET", "SPIRONOLACTONE 25MG TABLET", "SPIRONOLACTONE 25MG TABLET", 
    "SPIRONOLACTONE 25MG TABLET"), DRUG_STRENGTH_NO = c(0.25, 0.25, 
    0.25, 0.25, 0.25, 0.25, 0.25, 667, 667, 667, 667, 667, 667, 667, 
    667, 667, 667, 667, 1250, 1250, 1250, 1250, 1250, 1250, 1250, 
    625, 625, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 
    4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 200, 200, 
    200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 
    200, 200, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 
    40, 40, 40, 25, 25, 25, 25, 25, 500, 500, 500, 500, 500, 500, 
    500, 500, 500, 25, 25, 25, 25, 25), DRUG_STRENGTH_UNIT = c("MCG", 
    "MCG", "MCG", "MCG", "MCG", "MCG", "MCG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "IU", "IU", "IU", "IU", "IU", "IU", 
    "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", "IU", 
    "IU", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", 
    "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG", "MG"), INSTRUCTIONS = c("0.25MCG BIW NR", 
    "0.25mcg om", "0.25MCG tiw", "0.25MCG TIW NR", "1 BIW NR", "1 OM", 
    "1 TIW", "1 2T", "1 3T", "1 3T WITH MEALS", "1 BD", "1 TDS WITH MEALS", 
    "1TAB 3T", "1TAB 3T", "1TAB 3T", "1TAB 3T", "1TAB 3T", "2TAB 3T", 
    "1 3T", "1 3t nr", "1 3T WITH MEALS", "1 OM & 2 OA & 2 ON", "1.25G 3T", 
    "1.25G 3T", "1.25G 3T NR", "1250MG OM", "2 OM", "1", "1", "1", 
    "1", "1 BIW", "1 BIW NR", "1 EOW", "1 EOW", "1 oi", "1 once", 
    "1 TAD", "1 TAD", "4000UNIT BIW", "4000UNIT BIW NR", "4000UNIT EOW", 
    "4000UNIT OIW", "USE AS DIRECTED.", "(blank)", "1 2T", "1 2T", 
    "1 OM", "2 2T", "2 2T NR", "200MG 2T", "200MG 2T NR", "200MG BD", 
    "200MG OM", "200MG OM", "200MG TIW", "200MG TIW nr", "400MG 2T", 
    "400MG 2T", "400MG 2T NR", "400mg bd", "400MG OM", "1 2T", "1 OM", 
    "1 OM", "1 OM PRN", "1.5 2t", "2 2T", "20MG OM", "20MG OM", "20MG OM", 
    "40MG 2T", "40MG 2T", "40mg od", "40MG OM", "60MG 2T", "60mg bd", 
    "80mg 3t", ".5 om", "0.5 OM", "12.5MG 2T", "12.5MG OM", "12.5MG OM NR", 
    "1 2T", "1 3T", "1000MG 3T", "1G TDS", "2 3t", "500MG 2T", "500MG 2T NR", 
    "500MG 3T", "500mg tds", "1 OM", "1 OM NR", "12.5MG OM", "12.5MG OM", 
    "25MG OM"), EXPANDED_INSTRUCTIONS = c("TAKE ONE TABLET TWICE WEEKLY (NON-REFUNDABLE)", 
    "TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET THREE TIMES WEEKLY", 
    "TAKE ONE TABLET THREE TIMES WEEKLY (NON-REFUNDABLE)", "TAKE ONE TABLET TWICE WEEKLY (NON-REFUNDABLE)", 
    "TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET THREE TIMES WEEKLY", 
    "TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY", 
    "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", "TAKE ONE TABLET TWICE DAILY", 
    "TAKE ONE TABLET THREE TIMES DAILY WITH MEALS", "TAKE ONE TABLET 3 TIMES DAILY", 
    "TAKE ONE TABLET 3 TIMES DAILY (WITH MEALS)", "TAKE ONE TABLET 3 TIMES DAILY on meals", 
    "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", "TAKE ONE TABLET 3 TIMES DAILY with meals, started on 16/4/2010", 
    "TAKE TWO TABLETS 3 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY", 
    "TAKE ONE TABLET 3 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", 
    "TAKE ONE TABLET EVERY MORNING AND TWO TABLETS EVERY AFTERNOON AND TWO TABLETS EVERY NIGHT", 
    "TAKE ONE TABLET 3 TIMES DAILY", "TAKE ONE TABLET 3 TIMES DAILY WITH MEALS", 
    "TAKE ONE TABLET 3 TIMES DAILY (NON-REFUNDABLE)", "TAKE TWO TABLETS EVERY MORNING", 
    "TAKE TWO TABLETS EVERY MORNING", "INJECT CONTENTS OF ONE SYRINGE EVERY 10 DAYS", 
    "INJECT CONTENTS OF ONE SYRINGE EVERY 10 DAYS.", "INJECT CONTENTS OF ONE SYRINGE EVERY 2 WEEKS.", 
    "INJECT CONTENTS OF ONE SYRINGE EVERY TWO WEEKS", "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY", 
    "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY (NON-REFUNDABLE)", 
    "INJECT CONTENTS OF ONE SYRINGE EVERY FORTNIGHTLY", "INJECT ONE SYRINGE EVERY FORTNIGHTLY", 
    "INJECT CONTENTS OF ONE SYRINGE ONCE EVERY TEN DAYS", "INJECT CONTENTS OF ONE SYRINGE ONCE EVERY 10 DAYS", 
    "INJECT CONTENTS OF ONE SYRINGE AS DIRECTED (ONCE EVERY 10 DAYS)", 
    "INJECT CONTENTS OF ONE SYRINGE AS DIRECTED EVERY 10 DAYS", "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY", 
    "INJECT CONTENTS OF ONE SYRINGE TWICE WEEKLY (NON-REFUNDABLE)", 
    "INJECT CONTENTS OF ONE SYRINGE EVERY FORTNIGHTLY", "INJECT CONTENTS OF ONE SYRINGE ONCE WEEKLY", 
    "USE AS DIRECTED. ONCE EVERY 10 DAYS", "INSERT ONE SYRINGE EVERY 10 DAYS AS DIRECTED", 
    "TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 2 TIMES DAILY [STOP 2 WEEKS PRIOR TO COLONOSCOPY]", 
    "TAKE ONE TABLET EVERY MORNING", "TAKE TWO TABLETS 2 TIMES DAILY", 
    "TAKE TWO TABLETS 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET 2 TIMES DAILY", 
    "TAKE ONE TABLET 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE TABLET TWICE DAILY", 
    "TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET EVERY MORNING REDUCE. STOP AT NEXT TCU", 
    "TAKE ONE TABLET THREE TIMES WEEKLY", "TAKE ONE TABLET THREE TIMES WEEKLY (NON-REFUNDABLE)", 
    "TAKE TWO TABLETS 2 TIMES DAILY", "TAKE TWO TABLETS 2 TIMES DAILY TAKE DAILY", 
    "TAKE TWO TABLETS 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE TWO TABLETS TWICE DAILY", 
    "TAKE TWO TABLETS EVERY MORNING", "TAKE ONE TABLET 2 TIMES DAILY", 
    "TAKE ONE TABLET EVERY MORNING", "TAKE ONE TABLET EVERY MORNING prn for leg swelling", 
    "TAKE ONE TABLET EVERY MORNING WHEN NECESSARY FOR LEG SWELLING", 
    "TAKE ONE AND A HALF TABLETS 2 TIMES DAILY", "TAKE TWO TABLETS 2 TIMES DAILY", 
    "TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET EVERY MORNING DOSE DECREASED ON 08 10 2015", 
    "TAKE HALF TABLET EVERY MORNING started 9/11/12 for edema/ Decreased enalapril 9/11/12", 
    "TAKE ONE TABLET 2 TIMES DAILY", "TAKE ONE TABLET 2 TIMES DAILY Dose decreased on 27/1/2010. stop span K", 
    "TAKE ONE TABLET ONCE DAILY", "TAKE ONE TABLET EVERY MORNING", 
    "TAKE ONE AND A HALF TABLETS 2 TIMES DAILY", "TAKE ONE AND A HALF TABLETS TWICE DAILY", 
    "TAKE TWO TABLETS 3 TIMES DAILY", "TAKE HALF TABLET EVERY MORNING", 
    "TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET 2 TIMES DAILY", 
    "TAKE HALF TABLET EVERY MORNING", "TAKE HALF TABLET EVERY MORNING (NON-REFUNDABLE)", 
    "TAKE ONE CAPSULE 2 TIMES DAILY", "TAKE ONE CAPSULE 3 TIMES DAILY", 
    "TAKE TWO CAPSULES 3 TIMES DAILY", "TAKE TWO CAPSULES THREE TIMES DAILY", 
    "TAKE TWO CAPSULES 3 TIMES DAILY", "TAKE ONE CAPSULE 2 TIMES DAILY", 
    "TAKE ONE CAPSULE 2 TIMES DAILY (NON-REFUNDABLE)", "TAKE ONE CAPSULE 3 TIMES DAILY", 
    "TAKE ONE CAPSULE THREE TIMES DAILY", "TAKE ONE TABLET EVERY MORNING", 
    "TAKE ONE TABLET EVERY MORNING (NON-REFUNDABLE)", "TAKE HALF TABLET EVERY MORNING", 
    "TAKE HALF TABLET EVERY MORNING NEW, STARTED ON 20/4/15,", "TAKE ONE TABLET EVERY MORNING"
    ), TOTAL_DOSAGE = c(0.5, 0.25, 0.75, 0.75, 0.5, 0.25, 0.75, 1334, 
    2001, 2001, 1334, 2001, 2001, 2001, 2001, 2001, 2001, 4002, 3750, 
    3750, 3750, 6250, 3750, 3750, 3750, 1250, 1250, 12000, 12000, 
    4000, 4000, 8000, 8000, 4000, 4000, 12000, 12000, 12000, 12000, 
    8000, 8000, 4000, 4000, 4000, 12000, 400, 400, 200, 800, 800, 
    400, 400, 400, 200, 200, 600, 600, 800, 800, 800, 800, 400, 80, 
    40, 40, 40, 120, 160, 20, 20, 20, 80, 80, 40, 40, 120, 120, 240, 
    12.5, 12.5, 25, 12.5, 12.5, 1000, 1500, 3000, 3000, 3000, 1000, 
    1000, 1500, 1500, 25, 25, 12.5, 12.5, 25), PER = c("WEEK", "DAY", 
    "WEEK", "WEEK", "WEEK", "DAY", "WEEK", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "MONTH", "MONTH", 
    "FORTNIGHT", "FORTNIGHT", "WEEK", "WEEK", "FORTNIGHT", "FORTNIGHT", 
    "MONTH", "MONTH", "MONTH", "MONTH", "WEEK", "WEEK", "FORTNIGHT", 
    "WEEK", "MONTH", "MONTH", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "WEEK", "WEEK", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", "DAY", 
    "DAY", "DAY")), class = "data.frame", row.names = c(NA, -97L), .Names = c("DRUG_NAME", 
    "DRUG_STRENGTH_NO", "DRUG_STRENGTH_UNIT", "INSTRUCTIONS", "EXPANDED_INSTRUCTIONS", 
    "TOTAL_DOSAGE", "PER"))
    

    我已经编写了自己的代码,除了观察22和44之外,第一名患者在一天的不同时间服用不同数量的药片,而另一名患者的指示“ONCE”而不是“ONE SYRINGE” ”

    library(dplyr)
    library(tidyr)
    library(stringr)
    library(lubridate)
    library(rebus)
    

    我将频率(freq)和药物数量(drug_form_qty)列为矢量:

    freq <- c("EVERY MORNING", "EVERY AFTERNOON", "EVERY NIGHT", 
              "ONCE DAILY", "TWICE DAILY", "THRICE DAILY",
              "2 TIMES DAILY", "3 TIMES DAILY", 
              "TWO TIMES DAILY", "THREE TIMES DAILY",
              "ONCE WEEKLY", "TWICE WEEKLY", 
              "TWO TIMES WEEKLY", "THREE TIMES WEEKLY", 
              "EVERY FORTNIGHTLY", "EVERY 2 WEEKS", "EVERY TWO WEEKS",
              "EVERY TEN DAYS", "EVERY 10 DAYS")
    drug_form_qty <- c("HALF TABLET", "ONE TABLET", "ONE AND A HALF TABLETS", "TWO TABLETS", 
                       "ONE CAPSULE", "TWO CAPSULES", "ONE SYRINGE")
    

    然后,我创建了名为DRUG_FORM_QTY的新变量,它们提取了片剂,胶囊等的数量和FREQ,提取频率(即每天早晨,每天两次,每10天等) 。接下来,我将上述变量更改为数字DRUG_FORM_QTY_NOFREQ_NO。要知道它是每天,每周,每两周还是每月,我都会添加一个新变量PER来包含此信息。 TOTAL_DOSAGE的计算方法是DRUG_STRENGTH_NO*DRUG_FORM_QTY_NO*FREQ_NO

    df_new <- df %>% 
      mutate(DRUG_FORM_QTY = str_extract(EXPANDED_INSTRUCTIONS, pattern = or1(drug_form_qty)),
             FREQ = str_extract(EXPANDED_INSTRUCTIONS, pattern = or1(freq)),
             DRUG_FORM_QTY_NO = case_when(
               str_detect(DRUG_FORM_QTY, "ONE AND A HALF") ~ 1.5,
               str_detect(DRUG_FORM_QTY, "HALF") ~ 0.5,
               str_detect(DRUG_FORM_QTY, "ONE") ~ 1,
               str_detect(DRUG_FORM_QTY, "TWO") ~ 2),
             FREQ_NO = case_when(
               str_detect(FREQ, or1(c("TEN DAYS", "10 DAYS"))) ~ 3,
               str_detect(FREQ, or1(c("EVERY", "ONCE"))) ~ 1,
               str_detect(FREQ, or1(c("TWICE", "2 TIMES", "TWO TIMES"))) ~ 2,
               str_detect(FREQ, or1(c("THRICE", "3 TIMES", "THREE TIMES"))) ~ 3),
             PER = case_when(
               str_detect(FREQ, or1(c("EVERY MORNING", "EVERY AFTERNOON", "EVERY NIGHT", "DAILY"))) ~ "DAY",
               str_detect(FREQ, "WEEKLY") ~ "WEEK",
               str_detect(FREQ, or1(c("FORTNIGHTLY", "2 WEEKS"))) ~ "FORTNIGHT",
               str_detect(FREQ, or1(c("TEN DAYS", "10 DAYS"))) ~ "MONTH"),
             TOTAL_DOSAGE = DRUG_STRENGTH_NO*DRUG_FORM_QTY_NO*FREQ_NO)
    

    我想获得替代方案或有效方法来获得我想要的数据帧。谢谢。

0 个答案:

没有答案