今天有一个需求,要把一个500W行的表传输到一个MySQL数据库中。有OGG和阿里楼大大的工具(见anysql.net)两种比较好的解决方法。这里我也简单得写了一小段python代码来实现这种简单的需求。
方法:Oracle -cx_Oracle lib-> Python -MySQLdb lib-> MySQL
常见类型对应如下:
number(12) – long – decimal(12)
date – datetime – datetime
varchar2(20) – str – varchar(20)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
#! /usr/bin/env python #coding=utf-8 import MySQLdb import logging import cx_Oracle import datetime def makeMyConn(MyConnInfo): global MyConnection try: MyConnection = MySQLdb.connect(MyConnInfo['server'], MyConnInfo['user'], MyConnInfo['passwd'],MyConnInfo['db'], MyConnInfo['port']) logging.debug('Mysql Connection ' + MyConnInfo['server'] + '/' + MyConnInfo['db'] + ' established.') return MyConnection.cursor() except MySQLdb.Error,e: print "Mysql Error %d: %s" % (e.args[0], e.args[1]) def makeOraConn(tns_str): global OraConnection OraConnection=cx_Oracle.Connection(tns_str) logging.debug('Oracle Connection ' + tns_str + 'established.') return OraConnection.cursor() def closeMyConn(): logging.debug('MySQL Connection closed.') MyConnection.close() def closeOraConn(): logging.debug('Oracle Connection closed.') OraConnection.close() def transfer(oracur, mycur, src_table_name, dest_table_name, commit_rows=100000): # for preformance concerns, modify the arraysize of cursor cache insert_sql = "insert into " + dest_table_name + " values (%s, %s, %s, %s, %s, %s) " format = "yyyy-MM-dd HH:mm:ss" oracur.arraysize = commit_rows oracur.execute('select * from ' + src_table_name) counter = 0 prior_counter = 0 skip_counter = 0 result = oracur.fetchmany() skip_row = False while result: for row in result: if row == None: break one = [] for i in range(0, 6): if row[i] == None: one.append( 'NULL' ) elif type(row[i]) == str : one.append( "'" + row[i] + "'") elif type(row[i]) == datetime.datetime : one.append( "'" + row[i].strftime("%Y-%m-%d %H:%M:%S") + "'") elif type(row[i]) == long: one.append( str(row[i]) ) else: skip_row=True logging.warn('Invalid datatype, skipping...' ) if skip_row: skip_row = False skip_counter = skip_counter + 1 continue mycur.execute( insert_sql % (one[0], one[1], one[2], one[3], one[4], one[5]) ) counter = counter + 1 MyConnection.commit() logging.debug( str(counter-prior_counter) + ' rows commited.' + str(skip_counter) + ' rows skipped') prior_counter = counter result = oracur.fetchmany() logging.debug('Totally' + str(counter) + ' rows fetched & inserted.') def main(): #variables logfile = 'D:/ora2my.log' src_table_name = 'user.user_info' desc_table_name = 'user_info' #initialize logger handler logging.basicConfig( level = logging.DEBUG, format = '%(asctime)s - %(levelname)s - %(message)s', datefmt = '%m-%d %H:%M', filename = logfile, filemode = 'w') # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler() console.setLevel(logging.DEBUG) # set a format which is simpler for console use formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') # tell the handler to use this format console.setFormatter(formatter) logging.getLogger('').addHandler(console) # get the connection cursor OraConnInfo = {'tns_str':'eda/123456@testdb'} MyConnInfo = {'server':'172.19.1.1', 'db':'test', 'user':'user', 'passwd':'passwd', 'port':3306} mycur = makeMyConn(MyConnInfo) oracur = makeOraConn(OraConnInfo['tns_str']) transfer(oracur, mycur, src_table_name, desc_table_name) #close connections closeOraConn() closeMyConn() if __name__ == "__main__": main() |
coreldRAW本来就不大
我还以为要翻下来呢