2017-06-08 1 views
1

Ich habe folgende Datenrahmen:
So trennen Sie die Zeichenfolge mit "." als Trennzeichen in einem Datenrahmen mit dplyr

df <- structure(list(united_sample_names = structure(92:101, .Label = c("1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_208", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_209", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_210", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_211", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_212", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_213", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_214", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_215", "1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_216", 
"1-XX_iv.161202_NB501621_0042_AHV7NWBGXY.KK_218", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_172", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_173", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_174", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_175", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_176", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_177", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_178", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_179", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_180", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_181", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_182", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_183", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_184", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_185", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_186", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_187", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_188", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_189", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_190", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_191", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_192", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_193", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_194", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_195", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_196", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_197", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_198", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_199", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_200", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_201", "1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_202", 
"1-XX_iv.161205_NB501621_0043_AHVFM5BGXY.KK_203", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_359", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_360", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_361", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_362", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_363", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_364", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_365", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_366", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_367", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_368", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_369", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_370", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_371", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_372", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_373", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_374", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_375", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_376", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_377", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_378", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_379", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_381", "1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_382", 
"1-XX_iv.170125_NB501621_0067_AHW3MGBGXY.KK_383", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_100", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_101", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_102", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_103", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_104", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_106", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_107", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_109", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_110", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_111", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_112", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_113", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_114", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_115", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_116", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_117", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_118", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_119", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_120", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_122", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_124", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_125", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_126", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_127", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_128", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_86", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_87", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_88", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_89", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_90", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_92", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_93", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_95", 
"Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_97", "Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_99" 
), class = "factor")), .Names = "united_sample_names", row.names = c(NA, 
10L), class = "data.frame") 

df 
#>         united_sample_names 
#> 1 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_86 
#> 2 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_87 
#> 3 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_88 
#> 4 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_89 
#> 5 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_90 
#> 6 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_92 
#> 7 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_93 
#> 8 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_95 
#> 9 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_97 
#> 10 Vehicle_iv.170414_NB501621_0118_AHLTYHBGX2.KK_99 

Was ich tun möchte, ist es in aufzuspalten:

header1 header2       header3       
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_86 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_87 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_88 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_89 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_90 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_92 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_93 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_95 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_97 
    Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_99 

Ich versuchte dies und scheitern :

> df %>% separate(united_sample_names, c("header1","header2","header3")) 
    header1 header2 header3 
1 Vehicle  iv 170414 
2 Vehicle  iv 170414 
3 Vehicle  iv 170414 
4 Vehicle  iv 170414 
5 Vehicle  iv 170414 
6 Vehicle  iv 170414 
7 Vehicle  iv 170414 
8 Vehicle  iv 170414 
9 Vehicle  iv 170414 
10 Vehicle  iv 170414 
Warning message: 
Too many values at 10 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 

Was ist der richtige Weg?

+1

Sie sollten das Trennzeichen angeben, das '.' ist. Versuchen Sie 'df%>% separe (united_sample_names, c (" header1 "," header2 "," header3 ")," \\. ")' ' –

Antwort

1

Wir müssen die sep angeben. Andernfalls erkennt es automatisch einige der anderen Zeichen wie _ und würde die Spalten trennen.

library(tidyr) 
df %>% 
    separate(united_sample_names, c("header1","header2","header3"), sep="[.]") 
#  header1       header2 header3 
#1 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_86 
#2 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_87 
#3 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_88 
#4 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_89 
#5 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_90 
#6 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_92 
#7 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_93 
#8 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_95 
#9 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_97 
#10 Vehicle_iv 170414_NB501621_0118_AHLTYHBGX2 KK_99 
Verwandte Themen