#!/usr/bin/python # $Id: photoren,v 1.42 2007-07-01 02:27:04 neil Exp $ # Copyright 2007 Neil W. Van Dyke. This program is Free Software; you can # redistribute it and/or modify it under the terms of the GNU Lesser General # Public License as published by the Free Software Foundation; either version # 2.1 of the License, or (at your option) any later version. This program is # distributed in the hope that it will be useful, but without any warranty; # without even the implied warranty of merchantability or fitness for a # particular purpose. See "http://www.gnu.org/copyleft/lesser.html" for # details. For other license options and consulting, contact the author. # "photoren" is a small command-line program for rename and organize my digital # photograph files based on the time they were taken. It also renames any # corresponding GQview metadata files, and can generate a shell script to # reproduce the renaming and sorting on a backup copy of the files. # # I wrote "photoren" in 2007-06 to help tidy up my photo archive. My archive # was over 50GB, containing over 25,000 images and 16,000 metadata files. (It # would've been bigger, except I'd just culled around 5,000 images.) # Incidentally, the initial run of "photoren" on this archive took 45 minutes. # # Execute "photoren --help" to see the command line syntax and options. # # I'm releasing "photoren" in case anyone else gets some use out of it. Please # note that I haven't put much time into quality assurance for this program, # and that you use it at your own risk. You probably shouldn't use this # program unless you're comfortable with Unix administration and Python. And # I'll note here that "photoren" is the first chunk of Python code I've # written... # Note: In some cases, renames might be across filesystems. Not all Un*x # variants support this. prog_name = "photoren" prog_version = "0.1" prog_copyright = "Copyright (c) 2007 Neil Van Dyke" prog_homepage = "http://www.neilvandyke.org/photoren/" #---------------------------------------------------------------------- Imports # import pdb import datetime import mmpython.image import optparse import os import os.path import re import socket import stat import string import sys import time #---------------------------------------------------------- Messages and Errors def debug_msg(message): print >> sys.stderr, "*DEBUG* %s" % message def info_msg(message): print >> sys.stderr, message def warning_msg(message): print >> sys.stderr, "*WARNING* %s" % message def error_msg(message): print >> sys.stderr, "*ERROR* %s" % message def fatal_error(message): error_msg(message) sys.exit(1) #---------------------------------------------------------------------- Globals recursive = False archive_dir = None dryrun = False dryrun_set = None filenames = None gqview_metadata = None script_file = None #---------------------------------------------- EXIF-Based File Name Generation baseFileNameRe = re.compile( "\A *(\d{4}):(\d\d):(\d\d) +(\d\d):(\d\d):(\d\d) *\Z") def baseFileNameForExifKey(tags, key): try: datetime = tags[key].printable except KeyError: return None m = baseFileNameRe.match(datetime) if m: return "%s%s%s-%s%s%s" % (m.group(1), m.group(2), m.group(3), m.group(4), m.group(5), m.group(6)) else: return None def baseFileNameForFileExif(filename): try: f = open(filename, "rb") tags = mmpython.image.EXIF.process_file(f) except IOError: return None foo = baseFileNameForExifKey(tags, "EXIF DateTimeOriginal") if foo: return foo return baseFileNameForExifKey(tags, "Image DateTime") #------------------------------------------ Canonicalizing File Name Extensions file_ext_re = re.compile("\\.[^.]+$") ext_dict = dict({".jpg": ".jpg", ".jpeg": ".jpg"}) def canonicalized_filename_extension(filename): m = file_ext_re.search(filename) if m: try: return ext_dict[string.lower(m.group(0))] except KeyError: return None else: return None #-------------------------------------------------------------- GQview Metadata def gqview_metadata_path(pathname): # Note: We require that gqview_metadata not have a trailing "/", and # guarantee this with "os.path.realpath" elsewhere in the code. # # TODO: This currently uses string append, and thus might work only with # Unix pathnames. return "%s%s.meta" % (gqview_metadata, os.path.realpath(pathname)) #-------------------------------------------------------------- File Processing def process_path(path): mode = os.stat(path).st_mode if stat.S_ISREG(mode): process_regular_file(path) elif stat.S_ISDIR(mode): if recursive: paths = os.listdir(path) paths.sort() for f in paths: process_path(os.path.join(path, f)) else: warning_msg("Skipping directory %r." % path) elif stat.S_ISLNK(mode): warning_msg("Skipping symbolic link %r." % path) else: warning_msg("Skipping strange file %r." % path) def process_regular_file(filename): global dryrun_set # info_msg("Processing %r..." % filename) new_ext = canonicalized_filename_extension(filename) if not new_ext: error_msg("Missing or unrecognized extension for file %r." % filename) return timestamp_str = baseFileNameForFileExif(filename) if not timestamp_str: error_msg("Could not find EXIF timestamp for file %r." % filename) return # TODO: Perhaps don't rename files that look like they've already been # renamed, so avoid, e.g., converting a "20070704-123456-2.jpg" to a # "20070704-123456.jpg" and risking confusion with the prior file. A # command-line option could disable this protection. old_dir = os.path.dirname(filename) if archive_dir: new_dir = os.path.join(archive_dir, timestamp_str[0:4], timestamp_str[4:6]) if not dryrun: if not os.path.exists(new_dir): # Note: We *could* check here as to whether it's a directory. os.makedirs(new_dir) else: new_dir = old_dir new_filename = None new_count = 1 while True: if new_count == 1: new_filename = os.path.join(new_dir, (timestamp_str + new_ext)) else: new_filename = os.path.join(new_dir, (timestamp_str + "-" + str(new_count) + new_ext)) if (os.path.lexists(new_filename) or (dryrun_set and (new_filename in dryrun_set))): new_count += 1 continue else: break old_filename_realpath = os.path.realpath(filename) new_filename_realpath = os.path.realpath(new_filename) if old_filename_realpath == new_filename_realpath: info_msg("Nothing to do for %r." % filename) else: if dryrun: info_msg("Would rename %r to %r." % (filename, new_filename)) dryrun_set.add(new_filename) else: info_msg("Renaming %r to %r..." % (filename, new_filename)) try: os.rename(filename, new_filename) except OSError: error_msg("Could not rename %r to %r." % (filename, new_filename)) return # Rename any existing GQview metadata file. old_gq = gqview_metadata_path(filename) new_gq = gqview_metadata_path(new_filename) if os.path.exists(old_gq): if dryrun: info_msg("Would rename %r to %r." % (old_gq, new_gq)) else: info_msg("Renaming %r to %r..." % (old_gq, new_gq)) try: if archive_dir: new_gq_dir = os.path.dirname(new_gq) if not os.path.exists(new_gq_dir): # Note: We *could* check here as to whether it's a # directory. os.makedirs(new_gq_dir) os.rename(old_gq, new_gq) except OSError: error_msg("Could not rename %r to %r." % (old_gq, new_gq)) # TODO: Generalize the above two renaming chunks of code. Note that in # the latter we don't add to dryrun_set and we don't return. # Add to the script. if script_file: print >> script_file, "f %r %r %r %r" % \ (old_filename_realpath, new_filename_realpath, old_gq, new_gq) #------------------------------------------------------------------------- Main def main(): start_time = datetime.datetime.now() # Parse command-line arguments. op = optparse.OptionParser(usage="usage: %s { OPTION }* { FILE }+" % prog_name, version="%s %s\n%s\n%s" % (prog_name, prog_version, prog_copyright, prog_homepage)) op.add_option("-n", "--dry-run", action="store_true", dest="dryrun", default=False, help="Do not rename or move any files, but say a close" " approximation of what would be done.") op.add_option("-r", "--recursive", action="store_true", dest="recursive", default=False, help="Descend recursively into subdirectories, processing" " the contents of each subdirectory.") # TODO: op.add_option("-a", "--archive-structure", year-slash-month op.add_option("-d", "--archive-directory", action="store", type="string", dest="archive_dir", metavar="DIR", help="Move renamed files to archive directory DIR. Files" " will be organized into a tree of subdirectories for year" " and month. For example, if DIR is \"/foo\", then a file" " named \"20070704-123456.jpg\" would be moved to under the" " directory \"/foo/2007/07/\".") op.add_option("-q", "--gqview-metadata", action="store", type="string", dest="gqview_metadata_dir", metavar="DIR", help="""Also move GQview metadata files under directory DIR. Set DIR to empty string to disable this feature. Default: \".gqview/metadata\" under home directory""") op.add_option("-s", "--script", action="store", type="string", dest="script_path", metavar="FILE", help="Write a shell script to reproduce approximately the" " actions of this program. This is useful for updating a" " remote rsync backup, for example.") (options, filenames) = op.parse_args() global dryrun dryrun = options.dryrun if dryrun: global dryrun_set dryrun_set = set() global recursive recursive = options.recursive global gqview_metadata gqview_metadata = options.gqview_metadata_dir if gqview_metadata == "": gqview_metadata = None else: if gqview_metadata == None: gqview_metadata = os.path.expanduser("~/.gqview/metadata") # TODO: Will "isdir" cause an automounted filesystem to be mounted if # necessary? if os.path.isdir(gqview_metadata): # Note: We"re doing the "realpath" after checking for existence, in # case the filesystem needs mounting, but can only be mounted # through the original path, not the realpath. gqview_metadata = os.path.realpath(gqview_metadata) else: if options.gqview_metadata_dir: fatal_error("Specified GQview metadata directory %r not found." % gqview_metadata) gqview_metadata = None global archive_dir archive_dir = options.archive_dir # TODO: We probably need "stat" to follow any symlinks before we can test # for isdir. # # if archive_dir and not os.path.isdir(archive_dir): # fatal_error("Specified archive directory %r not found." % # archive_dir) # TODO: archive_structure if len(filenames) == 0: fatal_error("No files specified.") global script_file script_path = options.script_path if script_path: # TODO: Check that script file wouldn't overwrite an existing file, # using both exists and lexists. info_msg("Writing script file to %r." % script_path) try: script_file = open(script_path, "w") print >> script_file, "#!/bin/sh" print >> script_file, "# Generated by %s %s on host %r at %s." % \ (prog_name, prog_version, socket.gethostname(), time.strftime("%4Y%02m%02dT%02H%02M%02S %Z")) print >> script_file print >> script_file, "# Note: You might wish to modify this" \ " function, such as to adjust pathnames." print >> script_file, "f() {" print >> script_file, " mkdir -p \"`dirname $2`\"" print >> script_file, " if yes n | /bin/mv -i \"$1\" \"$2\" " \ "; then" print >> script_file, " while [ $# -ge 4 ] ; do" print >> script_file, " shift 2" print >> script_file, " mkdir -p \"`dirname $2`\"" print >> script_file, " yes n | /bin/mv -i \"$1\" " \ "\"$2\" > /dev/null 2>&1" print >> script_file, " done" print >> script_file, " else" print >> script_file, " echo \"*WARNING* Could not move" \ " \\\"$1\\\" to \\\"$2\\\".\" >&2" print >> script_file, " fi" print >> script_file, "}" print >> script_file except IOError: fatal_error("Could not open or write script file %r." % script_path) # Process path arguments. for filename in filenames: process_path(filename) # Finish and close any script file. if script_file: try: print >> script_file print >> script_file, "#EOF" script_file.close() except IOError: pass # Display done message. duration = datetime.datetime.now() - start_time # TODO: Do proper formatting of the duration. info_msg("Finished in %s days and %s seconds." % (duration.days, duration.seconds)) if __name__ == "__main__": main() #------------------------------------------------------------------------------ #EOF