You can try strcapture
with regular expression:
dat = data.frame(A = c("M24656811 M24677722 GREEN,SMITH34/M/B",
"M24654999 DOE,JANE V37/F/W", "M24333107 DOE,JOHN24/M/B"))
# Using strcapture
pattern <- "^(\\w*) ?(M2\\d+) (\\w+,[^0-9 ]+) ?(.+)$"
result = strcapture(pattern, dat$A,
proto = data.frame(A = character(),
B = character(),
C = character(),
D = character()))
result
You can also use str_match
:
result = str_match(dat$A, pattern)
result = as.data.frame(result[, -1])
colnames(result) <- c("A", "B", "C", "D")
result