pythonをつかって,XLSファイルをCSVに吐き出す
pyExceleratorをつかってXLSファイルをCSVファイルに吐き出すスクリプトをこちらでみつけたのだが,
日本語関連でうまく動かなかったので,ちょっと手直しして自分にあったものにしてみた.
#!/usr/bin/python # -*- coding: utf-8 """ xls2csv.py - xls to csv converter abstract licence """ __author__="yamaneko <yamaneko1212@gmail.com>" __date__ ="Sun Jan 31 20:45:30 2010" __version__="$Revision: 0 $" __credits__="" import sys, csv try: import pyExcelerator except: sys.stderr.write ('this script needs pyExcelerator\n') sys.exit (1) # count number def number_of_sheets (xls_file): try: return len (pyExcelerator.parse_xls( xls_file )) except: sys.stderr.write ('failed opening or parsing file ' + xls_file + '\n') sys.exit (1) def xls_to_csv (xls_file, csv_file='', delimiter='\t', sheet=0): # try to open and parse xls file try: xls_sheets = pyExcelerator.parse_xls( xls_file ) except: sys.stderr.write ('failed opening or parsing file ' + xls_file + '\n') sys.exit (1) # check sheet number if not (sheet < len (xls_sheets)): sys.stderr.write ('invalid sheet number ' + str (sheet) + '\n') sys.stderr.write (xls_file + ' has just ' + str (len (xls_sheets)) + ' sheets\n') sys.exit (1) # set dest file if csv_file == '': csv_file = str (xls_sheets[sheet][0]) + '.csv' #for sheet_id in xls_sheets: cell_dict = xls_sheets[sheet][1] # set limitter row_max and column_max for which in (0, 1): exec ("""%s = max([cell_dict.keys()[key_index][which]""" """ for key_index in xrange(len(cell_dict.keys()))] )""") % \ ["row_max", "column_max"][which] sys.stdout.write ('copying ' + xls_sheets[sheet][0] + ' of ' +\ xls_file + ' to ' + csv_file + '\n') # creating csv file. create_csv = csv.DictWriter(file(csv_file, "ab"), fieldnames=range(column_max+1), delimiter=delimiter) # copy data for row in xrange(row_max+1): create_csv.writerow(dict( enumerate([unicode (cell_dict.copy().get((row, column), '')).encode ('utf-8') for column in xrange(column_max+1)]))) sys.stdout.write ('finished writing to ' + csv_file + '\n') # for all def conv_all (src_file): for sheet_id in range (number_of_sheets (src_file)): xls_to_csv (src_file, sheet=sheet_id) sys.stdout.write(str (number_of_sheets (src_file)) + ' files are written\n') # usage def usage (): sys.stderr.write ('usage : python xls2csv.py src_xls_file\n') sys.stderr.write (' python xls2csv.py src_xls_file sheet_number\n') sys.stderr.write (' python xls2csv.py ' +\ 'src_xls_file dest_xls_file sheet_number\n') return if __name__ == "__main__": argc = len (sys.argv) if argc == 2: conv_all (sys.argv[1]) elif argc == 3: try: sheet = int (sys.argv[2]) except: sys.stderr.write ('invalid sheet number ' + str (sys.argv[2]) + '\n') usage () sys.exit (1) xls_to_csv (sys.argv[1], sheet=sheet) elif argc == 4: try: sheet = int (sys.argv[3]) except: sys.stderr.write ('invalid sheet number ' + str (sys.argv[2]) + '\n') usage () sys.exit (1) src_file = sys.argv[1] dest_file = sys.argv[2] if (src_file == dest_file): sys.stderr.write ('source file and destination file are same name\n') usage () sys.exit (1) xls_to_csv (src_file, csv_file=dest_file, sheet=sheet) else: usage () sys.exit (1) sys.exit (0)
使うのはpyExceleratorをインストールしてから,
$ python xls2csv.py source.xls
$ python xls2csv.py source.xls 3
$ python xls2csv.py source.xls destination.csv 3
みたいにどうぞ.