2017-01-02 3 views
0

Ich versuche, eine Figur wie die folgenden von Tuominen-Soini et al. (2012) mit ggplot2 in R zu machen:Overlay Linien auf gestapeltes Balkendiagramm mit ggplot2 in R

Tuominen-Soini et al. (2012) plot

Ich habe eine data.frame, bars_df, mit vier Variablen (Daten am Ende der Frage):

> str(bars_df) 
'data.frame': 18 obs. of 4 variables: 
$ key : chr "time_2" "time_2" "time_2" "time_2" ... 
$ val : Factor w/ 6 levels "0","1","2","3",..: 1 2 3 4 5 6 1 2 3 4 ... 
$ sum : num 0 147 144 63 512 30 0 100 302 168 ... 
$ prop: num 0 0.164 0.161 0.07 0.571 0.033 0 0.098 0.297 0.165 ... 

bars_df Verwendung, I ein Balkendiagramm, machten die Verwendung von Folge ing:

library(ggplot2) 
ggplot(bars_df, aes(x = key, y = prop, fill = val)) + 
    geom_col(position = 'stack') 

bar chart test plot

Getrennt davon wird durch die Schaffung Häufigkeitstabellen für Verschiebungen Individuen von einem Code zu einem anderen (oder demselben Code) zwischen time_1 und time_2 und zwischen time_2 und time_3 und Beurteilung was verschiebt (shift_1: time_1 zu time_2; shift_2: time_1 zu time_2) waren häufiger als durch Zufall zu erwarten (bezeichnet mit +) und weniger wahrscheinlich als Chance (bezeichnet mit -): „Ich habe die folgenden data.frame (Daten wieder am Ende):

> str(lines_df) 
'data.frame': 72 obs. of 3 variables: 
$ code : chr "0-0" "0-1" "0-2" "0-3" ... 
$ shift: chr "shift_1" "shift_1" "shift_1" "shift_1" ... 
$ sig : chr "+" NA NA NA ... 

In der ersten Reihe, zum Beispiel "0-0" für eine Verschiebung (eigentlich keine Verschiebung) ab time_1 zu Time_1 time_2 . I'd like to add lines two different Linientypen , one each for + and -` Verschiebungen, wie in der ersten Abbildung ab Ove.

Während es Beispiele für questions demonstrating how to overlay a line on a bar chart gibt, sehe ich nicht, wie ich diese beiden data.frame s in diesem Fall kombinieren kann. Es scheint schwierig zu sein angesichts dieser spezifischen Konfiguration von Balken und Linien.

bars_df Daten:

bars_df <- structure(list(key = c("time_2", "time_2", "time_2", "time_2", 
"time_2", "time_2", "time_1", "time_1", "time_1", "time_1", "time_1", 
"time_1", "time_3", "time_3", "time_3", "time_3", "time_3", "time_3" 
), val = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 
5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("0", "1", "2", "3", 
"4", "5"), class = "factor"), sum = c(0, 147, 144, 63, 512, 30, 
0, 100, 302, 168, 412, 35, 0, 51, 56, 84, 252, 20), prop = c(0, 
0.164, 0.161, 0.07, 0.571, 0.033, 0, 0.098, 0.297, 0.165, 0.405, 
0.034, 0, 0.11, 0.121, 0.181, 0.544, 0.043)), .Names = c("key", 
"val", "sum", "prop"), row.names = c(NA, -18L), class = "data.frame") 

lines_df Daten:

lines_df <- structure(list(code = c("0-0", "0-1", "0-2", "0-3", "0-4", "0-5", 
"1-0", "1-1", "1-2", "1-3", "1-4", "1-5", "2-0", "2-1", "2-2", 
"2-3", "2-4", "2-5", "3-0", "3-1", "3-2", "3-3", "3-4", "3-5", 
"4-0", "4-1", "4-2", "4-3", "4-4", "4-5", "5-0", "5-1", "5-2", 
"5-3", "5-4", "5-5", "0-0", "0-1", "0-2", "0-3", "0-4", "0-5", 
"1-0", "1-1", "1-2", "1-3", "1-4", "1-5", "2-0", "2-1", "2-2", 
"2-3", "2-4", "2-5", "3-0", "3-1", "3-2", "3-3", "3-4", "3-5", 
"4-0", "4-1", "4-2", "4-3", "4-4", "4-5", "5-0", "5-1", "5-2", 
"5-3", "5-4", "5-5"), shift = c("shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_1", "shift_1", "shift_1", 
"shift_1", "shift_1", "shift_1", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2", "shift_2", "shift_2", "shift_2", 
"shift_2", "shift_2", "shift_2"), sig = c("+", NA, NA, NA, NA, 
NA, NA, NA, "-", "-", NA, NA, NA, NA, "+", NA, NA, NA, NA, NA, 
NA, "+", "-", NA, NA, "-", NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, "+", NA, "+", "+", NA, NA, "-", 
"-", NA, NA, NA, NA, NA, NA, "+", NA, NA, NA, NA, NA, NA, "+", 
NA, NA, NA, NA, NA, NA, NA)), .Names = c("code", "shift", "sig" 
), row.names = c(NA, -72L), class = "data.frame") 

Antwort

1

Ich mag hässlich Code schreiben, wenn niemand zusieht.

library(dplyr) 
library(ggplot2) 

d <- arrange(bars_df, key, val) %>% 
    group_by(key) %>% 
    mutate(prop_start = lag(cumsum(prop)), prop_end = prop_start + prop, 
     midpoint = (prop_start + prop_end)/2, 
     next_key = paste("time", 1 + gsub("\\D", "", key) %>% 
          as.integer, sep = "_")) %>% 
    mutate(next_key = ifelse(next_key %in% unique(d$key), next_key, NA)) 

e <- select(d, key, midpoint) %>% 
    ungroup %>% 
    mutate(key = paste("time", -1 + gsub("\\D", "", key) %>% 
       as.integer, sep = "_")) %>% 
    rename(midpoint_end = midpoint) %>% 
    filter(key %in% unique(d$key)) 

e <- full_join(d, e) %>% 
    filter(!is.na(midpoint_end)) %>% 
    group_by(key, val) %>% 
    mutate(next_val = 1:n(), 
     code = paste(val, next_val, sep = "-")) %>% 
    left_join(lines_df) %>% 
    filter(!is.na(sig)) 

ggplot(d, 
     aes(x = key, xend = key, y = prop_start, yend = prop_end)) + 
    geom_segment(aes(color = val), size = 10) + 
    geom_segment(data = e, 
       aes(x = key, xend = next_key, 
        y = midpoint, yend = midpoint_end, 
        lty = sig), 
       arrow = arrow(length = unit(6, "pt"))) 

enter image description here

Verwandte Themen