pythonをつかって,XLSファイルをCSVに吐き出す

pyExceleratorをつかってXLSファイルをCSVファイルに吐き出すスクリプトこちらでみつけたのだが,

日本語関連でうまく動かなかったので,ちょっと手直しして自分にあったものにしてみた.

#!/usr/bin/python
# -*- coding: utf-8
"""
xls2csv.py - xls to csv converter

abstract

licence

"""

__author__="yamaneko <yamaneko1212@gmail.com>"
__date__ ="Sun Jan 31 20:45:30 2010"
__version__="$Revision: 0 $"
__credits__=""

import sys, csv

try:
    import pyExcelerator
except:
    sys.stderr.write ('this script needs pyExcelerator\n')
    sys.exit (1)

# count number
def number_of_sheets (xls_file):
    try:
        return len (pyExcelerator.parse_xls( xls_file ))
    except:
        sys.stderr.write ('failed opening or parsing file ' + xls_file + '\n')
        sys.exit (1)


def xls_to_csv (xls_file, csv_file='', delimiter='\t', sheet=0):

    # try to open and parse xls file
    try:
        xls_sheets = pyExcelerator.parse_xls( xls_file )
    except:
        sys.stderr.write ('failed opening or parsing file ' + xls_file + '\n')
        sys.exit (1)

    # check sheet number
    if not (sheet < len (xls_sheets)):
        sys.stderr.write ('invalid sheet number ' + str (sheet) + '\n')
        sys.stderr.write (xls_file + ' has just ' + str (len (xls_sheets)) + ' sheets\n')
        sys.exit (1)

    # set dest file
    if csv_file == '':
        csv_file = str (xls_sheets[sheet][0]) + '.csv'
    
    #for sheet_id in xls_sheets:
    cell_dict = xls_sheets[sheet][1]

    # set limitter row_max and column_max
    for which in (0, 1):
        exec ("""%s = max([cell_dict.keys()[key_index][which]"""
              """ for key_index in xrange(len(cell_dict.keys()))] )""") % \
                ["row_max", "column_max"][which]


    sys.stdout.write ('copying ' + xls_sheets[sheet][0] + ' of ' +\
                      xls_file + ' to ' + csv_file + '\n')

    # creating csv file.
    create_csv = csv.DictWriter(file(csv_file, "ab"),
                                fieldnames=range(column_max+1),
                                delimiter=delimiter)

    # copy data
    for row in xrange(row_max+1):
        create_csv.writerow(dict(
            enumerate([unicode (cell_dict.copy().get((row, column), '')).encode ('utf-8')
                       for column in xrange(column_max+1)])))

    sys.stdout.write ('finished writing to ' + csv_file + '\n')

# for all
def conv_all (src_file):
    for sheet_id in range (number_of_sheets (src_file)):
        xls_to_csv (src_file, sheet=sheet_id)
    sys.stdout.write(str (number_of_sheets (src_file)) + ' files are written\n')

# usage
def usage ():
    sys.stderr.write ('usage : python xls2csv.py src_xls_file\n')
    sys.stderr.write ('        python xls2csv.py src_xls_file sheet_number\n')
    sys.stderr.write ('        python xls2csv.py ' +\
                      'src_xls_file dest_xls_file sheet_number\n')
    return

if __name__ == "__main__":
    
    argc = len (sys.argv)
    
    if argc == 2:
        conv_all (sys.argv[1])
        
    elif argc == 3:
        try:
            sheet = int (sys.argv[2])
        except:
            sys.stderr.write ('invalid sheet number ' + str (sys.argv[2]) + '\n')
            usage ()
            sys.exit (1)
        
        xls_to_csv (sys.argv[1], sheet=sheet)
        
    elif argc == 4:
        try:
            sheet = int (sys.argv[3])
        except:
            sys.stderr.write ('invalid sheet number ' + str (sys.argv[2]) + '\n')
            usage ()
            sys.exit (1)

        src_file = sys.argv[1]
        dest_file = sys.argv[2]

        if (src_file == dest_file):
            sys.stderr.write ('source file and destination file are same name\n')
            usage ()
            sys.exit (1)
            
        xls_to_csv (src_file, csv_file=dest_file, sheet=sheet)
        
    else:
        usage ()
        sys.exit (1)
        
    sys.exit (0)

使うのはpyExceleratorをインストールしてから,

$ python xls2csv.py source.xls
$ python xls2csv.py source.xls 3
$ python xls2csv.py source.xls destination.csv 3

みたいにどうぞ.