2016-07-22 12 views
2

Ich möchte ein paar hundert separate Datenbanken in r von MySQL erhalten. Ich kann sie getrennt bekommen, aber ich weiß nicht, wie man eine Schleife/Funktion macht/anwendet, um alle auf einmal zu bekommen.Wie bekomme ich alle mysql-Datenbanken in r

Hier ist, wie ich sie getrennt erfasse.

library(RMySQL) 
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
dub4_4_16 <- dbSendQuery(mydb, "select * from dub") 
dub4_4_16 = fetch(dub4_4_16, n=-1) 

reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted") 
reg4_4_16 = fetch(reg4_4_16, n=-1) 

dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5") 
dub4_5_16 = fetch(dub4_5_16, n=-1) 

Antwort

3

Ich verwende eine Liste von Funktionen, wie die unten Einsen (basierend auf Funktionen aus dem tcpl Paket genommen) in der RMySQL Umgebung zu arbeiten. Die Funktionen sind nur Wrapper zu DBI Anrufe. Dies ist eine sehr lange Antwort, aber der Kern des Ansatzes besteht darin, die Abfragezeichenfolgen zu erstellen, die über die Abfragen angewendet werden, um die Daten zu laden. Das Ergebnis (results) enthält eine Liste von data.table Objekten, die jede Tabelle aus jeder Datenbank in Ihrer Verbindung enthalten.

library(data.table) 
library(RMySQL) 

##----------------------------------------------------------------------------## 
## Accessory functions 
##----------------------------------------------------------------------------## 

getQuery <- function(query, db, user = "dayne", 
        pass = "password", host = "localhost") { 
    ## Change the default values as necessary, or just pass your values 
    ## each time the function is called. Check out the original to see 
    ## how it finds global variables set by the package. 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    result <- dbGetQuery(dbcon, query) 
    dbDisconnect(dbcon) 
    result <- as.data.table(result) 
    result[] 

} 

sendQuery <- function(query, db = "", user = "dayne", 
        pass = "password", host = "localhost") { 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    temp <- try(dbSendQuery(dbcon, query), silent = TRUE) 
    if (!is(temp, "try-error")) dbClearResult(temp) 
    dbDisconnect(dbcon) 

    if (!is(temp, "try-error")) return(TRUE) 

    temp[1] 

} 

appendTable <- function(dat, tbl, db, user = "dayne", 
         pass = "password", host = "localhost") { 

    dbcon <- dbConnect(drv = RMySQL::MySQL(), 
        user = user, 
        password = pass, 
        host = host, 
        dbname = db) 
    dbWriteTable(conn = dbcon, 
       name = tbl, 
       value = dat, 
       row.names = FALSE, 
       append = TRUE) 

    dbDisconnect(dbcon) 

    return(TRUE) 

} 

##----------------------------------------------------------------------------## 
## Create example data 
##----------------------------------------------------------------------------## 

listofdb <- c("db1", "db2", "db3") 
q1 <- paste0("CREATE DATABASE ", listofdb, ";") 
sapply(q1, sendQuery) 
listoftables <- paste0("tb", 1:5) 
q2fmt <- "CREATE TABLE %s (val DOUBLE)" 
q2 <- sprintf(q2fmt, listoftables) 
createtables <- function(db) { 
    sapply(q2, sendQuery, db = db) 
    sapply(listoftables, 
     appendTable, 
     db = db, 
     dat = data.table(val = rnorm(10))) 
} 
sapply(listofdb, create tables) 


##----------------------------------------------------------------------------## 
## Do the work 
##----------------------------------------------------------------------------## 

## Load all of the tables form different databases 
my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database 
my_db_list 
# [1] "db1" "db2" "db3" 

table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;") 
names(table_list) <- my_db_list 
table_list 
# $db1 
# Tables_in_db1 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 
# 
# $db2 
# Tables_in_db2 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 
# 
# $db3 
# Tables_in_db3 
# 1:   tb1 
# 2:   tb2 
# 3:   tb3 
# 4:   tb4 
# 5:   tb5 

## The db name to the table_list data.tables and collapse 
table_list <- lapply(names(table_list), 
        function(x) table_list[[x]][ , db := x]) 
table_list <- rbindlist(table_list) 
setnames(table_list, c("tbl", "db")) 

## Load all tables from all databases 
table_list[ , full_name := paste(db, tbl, sep = ".")] 
get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";") 
results <- lapply(get_tables, getQuery, db = "") 
names(results) <- table_list$full_name 

results[["db1.tb5"]] 
#   val 
# 1: -0.09380952 
# 2: 0.81556657 
# 3: 1.18589086 
# 4: 0.19746379 
# 5: 0.91738280 
# 6: 1.30142674 
# 7: 1.42089957 
# 8: -0.16475130 
# 9: 0.40345353 
# 10: -1.31012033 

##----------------------------------------------------------------------------## 
## Remove example data 
##----------------------------------------------------------------------------## 

cleanup <- paste0("DROP DATABASE ", listofdb, ";") 
sapply(cleanup, sendQuery) 
Verwandte Themen