2016-08-25 4 views
1

war ich versuchen, meine Millionen von Datensätzen aus CSV Datei zu MySQL Datenbank einfügen, durch Python | Pandas mit sqlalchemy. Manchmal wird diese Einfügung vor der Fertigstellung unterbrochen oder nicht einmal eine einzelne Zeile in die Datenbank eingefügt.Python Pandas mit sqlalchemy | Bulk Insert Fehler

mein Code:

import pandas as pd 
from sqlalchemy import create_engine 

df = pd.read_csv('/home/shankar/LAB/Python/Rough/*******.csv') 
# 2nd argument replaces where conditions is False 
df = df.where(pd.notnull(df), None) 
df.head() 
conn_str = "mysql+pymysql://root:[email protected]/MY_DB?charset=utf8&use_unicode=0" 
engine = create_engine(conn_str) 
conn = engine.raw_connection() 
df.to_sql(name='table_name', con=conn, 
     if_exists='append') 
conn.close() 

Fehler:

--------------------------------------------------------------------------- 
TypeError         Traceback (most recent call last) 
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1563    else: 
-> 1564     cur.execute(*args) 
1565    return cur 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args) 
164 
--> 165   query = self.mogrify(query, args) 
166 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args) 
143   if args is not None: 
--> 144    query = query % self._escape_args(args, conn) 
145 

TypeError: not all arguments converted during string formatting 

During handling of the above exception, another exception occurred: 

DatabaseError        Traceback (most recent call last) 
<ipython-input-6-bb91db9eb97e> in <module>() 
11 df.to_sql(name='company', con=conn, 
12   if_exists='append', 
---> 13   chunksize=10000) 
14 conn.close() 

/home/shankar/.local/lib/python3.5/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype) 
1163   sql.to_sql(self, name, con, flavor=flavor, schema=schema, 
1164     if_exists=if_exists, index=index, index_label=index_label, 
-> 1165     chunksize=chunksize, dtype=dtype) 
1166 
1167  def to_pickle(self, path): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype) 
569  pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index, 
570      index_label=index_label, schema=schema, 
--> 571      chunksize=chunksize, dtype=dtype) 
572 
573 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype) 
1659        if_exists=if_exists, index_label=index_label, 
1660        dtype=dtype) 
-> 1661   table.create() 
1662   table.insert(chunksize) 
1663 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in create(self) 
688 
689  def create(self): 
--> 690   if self.exists(): 
691    if self.if_exists == 'fail': 
692     raise ValueError("Table '%s' already exists." % self.name) 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in exists(self) 
676 
677  def exists(self): 
--> 678   return self.pd_sql.has_table(self.name, self.schema) 
679 
680  def sql_schema(self): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in has_table(self, name, schema) 
1674   query = flavor_map.get(self.flavor) 
1675 
-> 1676   return len(self.execute(query, [name, ]).fetchall()) > 0 
1677 
1678  def get_table(self, table_name, schema=None): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1574    ex = DatabaseError(
1575     "Execution failed on sql '%s': %s" % (args[0], exc)) 
-> 1576    raise_with_traceback(ex) 
1577 
1578  @staticmethod 

/home/shankar/.local/lib/python3.5/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback) 
331   if traceback == Ellipsis: 
332    _, _, traceback = sys.exc_info() 
--> 333   raise exc.with_traceback(traceback) 
334 else: 
335  # this version of raise is a syntax error in Python 3 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1562     cur.execute(*args, **kwargs) 
1563    else: 
-> 1564     cur.execute(*args) 
1565    return cur 
1566   except Exception as exc: 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args) 
163    pass 
164 
--> 165   query = self.mogrify(query, args) 
166 
167   result = self._query(query) 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args) 
142 
143   if args is not None: 
--> 144    query = query % self._escape_args(args, conn) 
145 
146   return query 

    DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting 

Dieser Fehler in irgendeiner Art von CSV Aufgetreten ist nur Dateien. Bitte benachrichtigen Sie meinen Bug auf diesem!

Vielen Dank im Voraus.

Antwort

0

Aus dem Fehler ausgelöst, die Argumente in der Abfrage sind nicht alle Zeichenfolgen, was Sie tun müssten, ist gehen und konvertieren Sie jedes der Elemente von Datenrahmen in Zeichenfolge. df =df.astype(str)

+0

I GET ERROR AS "name 'creaqte_engine' ist nicht definiert" ----------------------------- ---------------------------------------------- NameError Traceback (jüngste Aufforderung zuletzt) ​​ in () 7 df.head() 8 conn_str = „mysql + pymysql: // root: root @ localhost/globalTracker charset = UTF-8 & use_unicode = 0 " ----> 9 Motor = creaqte_engine (conn_str) 10 conn = engine.raw_connection() 11 # df = df.typ (str) NameErr oder: name 'creaqte_engine' ist nicht definiert –

+0

Einfach den Tippfehler korrigieren - 'q' aus' crea * q * te_engine' entfernen – van