#!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes # Version 0.1 released July 15, 2001 # Copyright (C) 2001 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). # See http://www.gnu.org/copyleft/gpl.html for details. # # Please send me mail if/when you find bugs or have any suggestions. """ rdiff-backup -- Mirror files while keeping incremental changes Table of Contents Classes: RBException Logger BasicFileOperations Path PathTriple TimeRelated BackupPrepare Copier Incrementer RestorePrepare RestoreSequence Restorer Main Misc Functions Summary of execution: Main.Main() runs, parses the commandline stuff, etc., and then runs a copy of either BackupPrepare or RestorePrepare depending on the action taken. Incrementer/Copier or Restorer do most of the work. """ from __future__ import nested_scopes import os, stat, time, sys, shutil, tempfile, getopt, re, UserString class RBException(Exception): pass class Logger: """Manage output and status messages In general, a message will not be transmitted unless it's verbosity is less than or equal to the set verbosity. Each message also has an error level, which is 0 for normal messages, 1 for warnings, and 2 for errors. """ def __init__(self): """Logger initializer self.verbosity determines how much output should be written self.logfile is the filename of the log file to write to self.logfp is the python file object form of logfile """ self.verbosity = None self.logfile = None self.logfp = None def __del__(self): """Close logfile if still open""" if self.logfp: self.logfp.close() def Log(self, errstring, verbosity, errlevel = 0): """Main logging function - record errstring""" if verbosity <= self.verbosity: logstring = self.getprefix(errlevel) + errstring self.write_to_terminal(logstring, errlevel) if self.logfile: self.logfp.write(logstring + "\n") def FatalError(self, errstring, exitlevel): """Exit program with exitlevel because of fatal error""" self.Log(errstring, 1, 2) sys.exit(exitlevel) def open_logfile(self, logfile): """Initialize logfile""" self.logfp = open(str(logfile), "a") self.logfile = logfile def getprefix(self, errlevel): """Return string which precedes log entries""" if self.verbosity < 9: prefix = "" else: prefix = time.asctime(time.localtime(time.time())) + " " if errlevel == 1: prefix = prefix + "Warning: " elif errlevel == 2: prefix = prefix + "ERROR: " return prefix def write_to_terminal(self, string, errlevel): """Display message either on stdout or stderr""" if errlevel > 0: sys.stderr.write(string+"\n") else: sys.stdout.write(string+"\n") L = Logger() class BasicFileOperations: """Provides functions to copy, make a diff, etc. All of these should work with a Path object instead of a string when referring to filenames. """ def touch(self, target): """Make a file with no length""" open(str(target), "w").close() def rdiff(self, source, target, diff): """Produce a diff from source to target filenames called diff""" sigfile = tempfile.mktemp() try: self.run_external("rdiff signature %s %s" % (source, sigfile)) self.run_external("rdiff delta %s %s %s" % (sigfile, target, diff)) except RBException: try: os.unlink(sigfile) except: pass L.FatalError(sys.exc_info()[1], 8) try: os.unlink(sigfile) except os.error: L.Log("Unable to remove " + sigfile, 3, 1) def patch(self, patchee, diff): """Use diff to modify patchee""" L.Log("Patching file %s with %s" % (patchee, diff), 7) outfile = tempfile.mktemp() self.run_external("rdiff patch %s %s %s" % (patchee, diff, outfile)) patchee.delete() shutil.copy(outfile, str(patchee)) def mknodlike(self, source, target, type): """Use unix prog mknod and stat to make special file This depends on the commandline usage of mknod, and the format of the data returned by "stat -t". This at least works under linux, and hopefully other platforms too. """ L.Log("Using stat and mknod to make file %s. This may not work " "on all unix platforms." % target, 3, 1) statpipe = os.popen("stat -t %s" % source) statoutputlist = statpipe.read().split() statpipe.close() major, minor = map(lambda x: int(x, 16), (statoutputlist[9], statoutputlist[10])) self.run_external("mknod %s %s %d %d" % (target, type, major, minor)) def run_external(self, path): L.Log("Running: " + path, 8) if os.system(str(path)): raise RBException("Problem running " + path) BFO = BasicFileOperations() class Path(UserString.UserString): """Wrapper around the name of a file and its stats This class caches lstat calls (in self.statblock), the time in seconds the filename indicates (self.filetime), and directory listings (self.listdir). """ def __init__(self, path): """Path initializer - call base class, set statted to None""" UserString.UserString.__init__(self, path) self.statted = None self.filetime = None self.dirlisting = None def setpath(self, path): """Update self.data with path, clear statblock""" self.data = path self.clearstats() def clearstats(self): """Clear any cached stats. Use when file is changed""" if self.statted: self.statted = None self.statblock = None def normalize(self): """Set self.data to canonical version This just means that redundant /'s will be removed, including the trailing one, even for directories. ".." components will be retained. """ newpath = "/".join(filter(lambda x: x and x != ".", self.data.split("/"))) if self.startswith("/"): self.setpath("/" + newpath) else: self.setpath(newpath) def lstat(self, override = None): """Return output of os.lstat and save results""" if not self.statted or override: try: self.statblock = os.lstat(self.data) except os.error: self.statblock = None self.statted = 1 return self.statblock def isdir(self): """True if path is of a directory""" return self.lstat() and stat.S_ISDIR(self.lstat()[stat.ST_MODE]) def isreg(self): """True if path is of a regular file""" return self.lstat() and stat.S_ISREG(self.lstat()[stat.ST_MODE]) def issym(self): """True if path is of a symlink""" return self.lstat() and stat.S_ISLNK(self.lstat()[stat.ST_MODE]) def type(self): """Return string used to indicate type of file Currently we are using None if the file doesn't exist, "dir" for a directory, "sym" for a symlink, "reg" for a regular file, and "other" for everything else. """ if not self.lstat(): return None elif self.isreg(): return "reg" elif self.isdir(): return "dir" elif self.issym(): return "sym" else: return "other" def ctime(self): """Return last time (in seconds) of file change""" return max(self.lstat()[stat.ST_CTIME], self.lstat()[stat.ST_MTIME]) def listdir(self): """Cached version of os.listdir(self). Use only on directories""" if self.dirlisting is None: self.dirlisting = map(Path, os.listdir(self.data)) return self.dirlisting def dirsplit(self): """Returns a tuple (dirname, basename) Basename is never '' unless self is root, so it is unlike os.path.basename. If path is just above root (so dirname is root), then dirname is ''. In all other cases dirname is not the empty string. Also, dirsplit depends on the format of self, so basename could be ".." and dirname could be a subdirectory. For an atomic relative path, dirname will be '.'. """ self.normalize() if self.data.find("/") == -1: return (Path("."), self) comps = self.data.split("/") return (Path("/".join(comps[:-1])), Path(comps[-1])) def append(self, newpath): """Create new Path by adding on new path""" result = self + "/" + newpath result.normalize() return result def delete(self): """Remove file (and any subdirectories if dir)""" if self.isdir(): shutil.rmtree(self.data) else: os.unlink(self.data) self.clearstats() def shallowcopy(self, target): """Turn target path into a copy of self, including attributes Does not recurse down directories. So if target and self are both directories, the only effect will be the target's attributes being changed. """ if target.lstat() and not (target.isdir() and self.isdir()): target.delete() if not target.lstat(): if self.isreg(): shutil.copyfile(self.data, target.data) else: self.createfilelike(target) if target.lstat(1): target.updateattribs(self) def deepcopy(self, target): """Like shallowcopy, but recur if directory""" L.Log("Deep copying %s to %s" % (self, target), 8) self.shallowcopy(target) if self.isdir(): for entry in self.listdir(): (self + "/" + entry).deepcopy(target + "/" + entry) def updateattribs(self, sourcepath): """Change file attributes of self to match source Only changes the chmoddable bits, uid/gid ownership, and timestamps, so self must already exist and be of right type. """ s = sourcepath.lstat() if not s: raise RBException("Trying to copy non-existent file %s" % sourcepath) elif sourcepath.issym(): return # symlinks have no valid attributes os.chmod(self.data, stat.S_IMODE(s[stat.ST_MODE])) os.chown(self.data, s[stat.ST_UID], s[stat.ST_GID]) os.utime(self.data, (s[stat.ST_ATIME], s[stat.ST_MTIME])) self.clearstats() def createfilelike(self, targetpath): """Create new file targetpath of same type as self Returns true if some change is made (always except for sockets, which are ignored). """ if targetpath.lstat(): raise RBException("File %s already exists" % targetpath) mode = self.lstat()[stat.ST_MODE] if self.isdir(): targetpath.mkdir() elif self.isreg(): BFO.touch(targetpath) elif self.issym(): os.symlink(os.readlink(self.data), targetpath.data) elif stat.S_ISCHR(mode): BFO.mknodlike(self, targetpath, "c") elif stat.S_ISBLK(mode): BFO.mknodlike(self, targetpath, "b") elif stat.S_ISFIFO(mode): os.mkfifo(targetpath.data) elif stat.S_ISSOCK(mode): L.Log("Ignoring socket %s" % self, 3, 1) return None else: raise RBException("Unknown file type for %s" % self) targetpath.clearstats() return 1 def mkdir(self): """Like os.mkdir(self) but does string conversion""" os.mkdir(self.data) self.clearstats() def isincfile(self): """Return true if extension makes self look like incfile""" basecomps = self.dirsplit()[1].split(".") if len(basecomps) < 3: return None type = basecomps[-1] if (type != "snapshot" and type != "diff" and type != "missing" and type != "dir"): return None try: self.inc_time() except ValueError: return None return 1 def inc_time(self): """Return time in seconds indicated by inc file's filename This, like the other inc_ methods, should only be used if a filename passes self.isincfile(). """ if not self.filetime: timestr = self.split(".")[-2] date, timeofday = timestr.split("_") year, month, day = date.split("-") hour, minute, second = timeofday.split(":") self.filetime = time.mktime(map(int, (year, month, day, hour, minute, second, -1, -1, -1))) return self.filetime def inc_type(self): """Return the inctype of an incfile Currently this is just the final extension of the file, so "dir", "missing", "snapshot", or "diff". """ return self.split(".")[-1] def inc_basename(self): """Return name of incfile minus extension""" return Path(".".join(self.split(".")[:-2])) def inc_addextension(self, timestring, type): """Return new path of incfile with appropriate timestring and type""" return Path(".".join([self.data, timestring, type])) class PathTriple: """Hold six paths. They are newdir, newbase, olddir, oldbase, incdir, and incbase, where for each dir is like "/a/b" and base is like "c" so when combined as in newfull, oldfull, and incfull, they make "/a/b/c". new, old, and inc are used for the source dir, the dir that becomes a mirror, and the dir that holds the increments. For old and new base is the same of the actual files expected, while for inc, most of the files dealt with will have an extension tacked on to base first. """ def __init__(self, newdir, newbase, olddir, oldbase, incdir, incbase): self.newdir, self.newbase = newdir, newbase self.newfull = newdir.append(newbase) self.olddir, self.oldbase = olddir, oldbase self.oldfull = olddir.append(oldbase) self.incdir, self.incbase = incdir, incbase self.incfull = incdir.append(incbase) def append_base(self, basename): """Return new PathTriple moving fulls to dirs and add new base""" return PathTriple(self.newfull, basename, self.oldfull, basename, self.incfull, basename) def __str__(self): """used for logging""" return ("newfull = %s\noldfull = %s\nincfull = %s" % (self.newfull, self.oldfull, self.incfull)) class TimeRelated: """Functions and data which depend on the time""" def __init__(self, timeinseconds): """Initializer - set time self.previnctime is the time in seconds of the previous backup self.timestr is a user readable and sortable version of time """ self.time = timeinseconds self.previnctime = None self.timestr = self.timetostring(self.time) def setprevinctime(self, previnctime): """Set previnctime and prevtimestr from time in seconds""" self.previnctime = previnctime self.prevtimestr = self.timetostring(previnctime) def getfullname(self, prefix, type): """Return the full modified name of an update file""" return "%s.%s.%s" % (prefix, self.prevtimestr, type) def timetostring(self, timeinseconds): """Makes a user readable version of time in seconds""" return time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(timeinseconds)) def recentlychanged(self, statblock): """Return true if file has changed since previous backup""" return (statblock[stat.ST_CTIME] > self.previnctime or statblock[stat.ST_MTIME] > self.previnctime) def recent_extension(self, filename): """Return true for files with recent extension True if time extension of filename indicates is newer than or equally as old as self.time. """ return stringtotime(string.split(filename, ".")[-2]) >= self.time class BackupPrepare: """Prepare for a Backup operation Most of the class variables are taken from the Main class, or the Incrementer and Restorer classes and mean the same thing. self.marker_time is used by self.get_marker_time and caches the time of last backup, and self.marker is the filename of the marker. """ def __init__(self, args, exclude, exclude_mirror): self.source, self.target = map(Path, args) self.exclude = exclude self.exclude_mirror = exclude_mirror self.rbdir = None self.TR = None self.marker_time = None self.marker = None def Start(self): """Start backing up""" L.Log("Starting backup", 6) self.check_arguments() self.prepare_rbdir() L.open_logfile(self.rbdir + "/backup.log") self.TR = TimeRelated(time.time()) self.add_misc_excludes() if self.get_marker_time(): self.incremental_backup() else: self.mirror_only() self.reset_time_marker() def check_arguments(self): """Make sure initial arguments are good Both source and target must be names of directories. Abort if target already exists but isn't a directory. """ self.source.normalize() self.target.normalize() if not self.source.isdir(): L.FatalError("Currently the head of the backup tree " "must be a directory. Sorry.", 4) if not self.target.lstat(): L.Log("Creating directory %s" % self.target, 4) self.target.mkdir() elif not self.target.isdir(): L.FatalError("Destination %s exists but is not a directory" % self.target, 5) def prepare_rbdir(self): """Create rdiff-backup-data dir if does not exist""" def rbdirerror(): L.FatalError("rdiff backup directory %s exists and is not a " "directory, or could not be created", 6) self.rbdir = self.target + "/rdiff-backup-data" if not self.rbdir.lstat(): try: self.rbdir.mkdir() except os.error: rbdirerror() elif not self.rbdir.isdir(): rbdirerror() def add_misc_excludes(self): """Add commonsense excludes""" self.exclude_mirror.append(str(self.rbdir)) self.exclude.append("/proc") def reset_time_marker(self): """Create file marking time of last backup, delete old one""" basename = self.rbdir + "/current_mirror" BFO.touch(basename.inc_addextension(self.TR.timestr, "snapshot")) if self.marker: self.marker.delete() def get_marker_time(self): """Return time in seconds of marker, or 0 if none exists""" if self.marker_time is None: l = filter(lambda x: x.startswith("current_mirror"), self.rbdir.listdir()) if not l: self.marker_time = 0 elif len(l) > 1: L.FatalError("Two time marker (current_mirror.---) files " "found. Please delete all but one.", 8) else: self.marker = self.rbdir + "/" + l[0] self.marker_time = self.marker.inc_time() return self.marker_time def incremental_backup(self): """Do incremental backup from self.source to self.target""" L.Log("Starting incremental backup " + self.TR.timestr, 3) self.TR.setprevinctime(self.get_marker_time()) newcomps, oldcomps = self.source.dirsplit(), self.target.dirsplit() pt = PathTriple(newcomps[0], newcomps[1], oldcomps[0], oldcomps[1], self.rbdir, "increments") Incrementer(self.TR, self.exclude, self.exclude_mirror).Increment(pt) L.Log("Finished incremental backup " + self.TR.timestr, 3) def mirror_only(self): """Mirror source to target, no incrementing""" L.Log("Copying %s to %s for initial mirror" % (self.source, self.target), 3) Copier(self.exclude, self.exclude_mirror).Copy(self.source, self.target) class Copier: """Copy files around This is a class so we can use excludes and other class variables to direct and accumulate data about file operations. self.exclude and self.exclude_mirror are lists of regular expressions indicating which files from the new and mirror directory to exclude. exclude_regexps and exclude_mirror_regexps are compiled versions of these. """ def __init__(self, exclude, exclude_mirror): self.exclude = exclude self.exclude_regexps = map(re.compile, exclude) self.exclude_mirror = exclude_mirror self.exclude_mirror_regexps = map(re.compile, exclude_mirror) L.Log("Excludes: " + str(self.exclude), 7) L.Log("Mirror excludes: " + str(self.exclude_mirror), 7) def Copy(self, sourcepath, targetpath): """Recursively copy sourcepath to targetpath""" L.Log("Deep copying %s to %s" % (sourcepath, targetpath), 8) if (self.isexcluded(sourcepath, self.exclude_regexps) or self.isexcluded(targetpath, self.exclude_mirror_regexps)): L.Log("Excluding file %s" % sourcepath, 5) return sourcepath.shallowcopy(targetpath) if sourcepath.isdir(): for entry in sourcepath.listdir(): self.Copy(sourcepath.append(entry), targetpath.append(entry)) def isexcluded(self, filename, exclude_regexps): """Return true if filename matches exclude list""" for regexp in exclude_regexps: m = regexp.match(str(filename)) if m and m.group() == filename: return 1 return None class Incrementer(Copier): """Namespace and data used by Increment and related functions This class uses the following variables: self.TR, a TimeRelated, holding the time of this backup and the the previous one. self.instructions, which tells how each file is to be backed up. The function indicated by the dictionary is run with a PathTriple as its argument. If the type isn't in the dictionary, use "other" as the default. """ def __init__(self, TR, exclude, exclude_mirror): Copier.__init__(self, exclude, exclude_mirror) self.TR = TR self.instructions = { "reg": { "reg": self.diff, None: self.missing, "other": self.snapshot }, "dir": { "dir": self.dir_increment, None: self.missing, "other": self.snapshot }, None: { "dir": self.dir_increment, "other": self.snapshot }, "other": { None: self.missing, "other": self.snapshot }} def instructions_get(self, newfiletype, oldfiletype): """Lookup up pair in instructions""" if self.instructions.has_key(newfiletype): d = self.instructions[newfiletype] else: d = self.instructions["other"] if d.has_key(oldfiletype): return d[oldfiletype] else: return d["other"] def Increment(self, pt): """Update new to old and old to inc in pathtriple pt Increment returns true if action was taken or false if nothing was to be done for the files. This is so directories can be flagged if their subdirectories change. """ L.Log("Increment on %s" % pt.newfull, 7) L.Log("pathtriple:\n%s" % pt, 8) if (self.isexcluded(pt.newfull, self.exclude_regexps) or self.isexcluded(pt.oldfull, self.exclude_mirror_regexps)): L.Log("Excluding file %s" % pt.newfull, 5) return None if self.shortcut_by_time(pt): return None changed = self.instructions_get(pt.newfull.type(), pt.oldfull.type())(pt) if changed: L.Log("Updating file %s" % pt.oldfull, 7) if pt.newfull.lstat(): pt.newfull.shallowcopy(pt.oldfull) else: pt.oldfull.delete() return changed def shortcut_by_time(self, pt): """Return true if possible to skip over file by looking at time""" return (pt.newfull.lstat() and not pt.newfull.isdir() and pt.newfull.ctime() < self.TR.previnctime) def diff(self, pt): """Create increment diff, update old""" diffname = self.getincname(pt.incfull, "diff") BFO.rdiff(pt.newfull, pt.oldfull, diffname) diffname.updateattribs(pt.oldfull) return 1 def missing(self, pt): """Create increment to note that pt.old was missing""" BFO.touch(self.getincname(pt.incfull, "missing")) return 1 def snapshot(self, pt): """Copy old to inc This is used when the file changes types, the file is no longer in the old directory, or for some other reason a diff cannot be made. """ pt.oldfull.shallowcopy(self.getincname(pt.incfull, "snapshot")) return 1 def dir_increment(self, pt): """Increment directories - called when pt.old or pt.new is a dir""" if pt.newfull.isdir(): new_filelist = pt.newfull.listdir() else: new_filelist = [] if pt.oldfull.isdir(): old_filelist = pt.oldfull.listdir() else: old_filelist = [] if not pt.incfull.lstat(): pt.incfull.mkdir() changed = mapor(lambda fn: self.Increment(pt.append_base(fn)), listunion(new_filelist, old_filelist)) if changed: incpath = self.getincname(pt.incfull, "dir") BFO.touch(incpath) incpath.updateattribs(pt.oldfull) return changed def getincname(self, basepath, type): """Return name of new incfile based basepath with type type""" return basepath.inc_addextension(self.TR.prevtimestr, type) class RestorePrepare: """Prepare for Restore operation""" def __init__(self, args, exclude): self.args = args self.exclude = exclude self.oldfile = None def Start(self): """Start restoring""" L.Log("Starting restore", 6) self.set_incfile_and_target() TR = TimeRelated(self.incfile.inc_time()) self.exclude.append(".*/rdiff-backup-data") self.set_rbdir_and_oldfile() L.open_logfile(self.rbdir + "/restore.log") L.Log("Restoring %s to %s, going back to dataset %s." % (self.incfile, self.target, TR.timestr), 4) L.Log("Current time: %s" % time.asctime(), 4) newcomps = self.target.dirsplit() oldcomps = self.oldfile.dirsplit() inccomps = self.incfile.inc_basename().dirsplit() pt = PathTriple(newcomps[0], newcomps[1], oldcomps[0], oldcomps[1], inccomps[0], inccomps[1]) Restorer(TR, self.exclude).Restore(pt) L.Log("Finished Restore", 6) def set_incfile_and_target(self): """Make sure args are good and init incfile and target""" self.incfile = Path(self.args[0]) if len(self.args) == 2: self.target = Path(self.args[1]) else: self.target = self.incfile.inc_basename() self.incfile.normalize() self.target.normalize() if not self.incfile.isincfile(): L.FatalError("%s does not look like an increment file" % self.incfile, 3) if self.target.lstat(): L.FatalError("File %s seems to exist already. Termining " "restore. Use rdiff-backup\nwith two arguments " "to specify the restore target, as in:\n" "'rdiff-backup backupfile target'" % self.target, 7) def set_rbdir_and_oldfile(self): """Set self.rbdir, the backup data dir, and oldfile The idea here is to keep backing up on the path until we find something named "rdiff-backup-data". Then use that as a reference to calculate the oldfile. This could fail if the increment file is pointed to in a funny way, using symlinks or somesuch. """ fullincpath = Path(os.path.join(os.getcwd(), self.incfile)) dir = fullincpath while 1: dir, base = dir.dirsplit() if base == "rdiff-backup-data": break elif not dir or dir== ".": L.FatalError("Unable to find rdiff-backup-data directory", 9) self.rbdir = dir + "/" + base L.Log("Set rbdir to %s" % self.rbdir, 8) extension = fullincpath[len(self.rbdir + "/increments"):] self.oldfile = (dir + extension).inc_basename() L.Log("Set oldfile to %s" % self.oldfile, 8) class RestoreSequence: """Hold instructions on how to restore a file The main data structure here is a sequence of triples stored in self.seq. Each triple is (time in seconds, inctype, path) of an increment file. To use, set the mirror file name and target name with __init__, then call self.add for each relevant increment file. Then call restore and poof the target file will be made. """ def __init__(self, mirror_file, target): """RestoreSequence initializer""" self.mirror_file = mirror_file self.target = target self.seq = [] def path_to_tuple(self, path): """Given path return matching sequence element""" return (path.inc_time(), path.inc_type(), path) def append(self, path): """Add path (filename of increment file) to the sequence""" self.seq.append(self.path_to_tuple(path)) def set_seq_from_filelist(self, filelist): """Replace sequence using new list of filenames""" self.seq = map(self.path_to_tuple, filelist) def normalize(self): """Remove unnecessary elements in the sequence It turns out that, except in the case of diff increments, only the last increment will be necessary. The method here uses the fact that sorting self.seq will produce the increments in chronological order. Sequence ends up in reverse chronological order. """ self.seq.sort() for index in range(len(self.seq)): if self.seq[index][1] != "diff": self.seq = self.seq[:index + 1] break self.seq.reverse() def restore(self): """Create the file indicated by the sequence. Do not recur.""" self.normalize() L.Log("Restore Sequence = %s" % self.seq, 8) if not self.seq: self.copy_from_mirror() for entry in self.seq: if entry[1] == "missing": L.Log("%s means that file did not exist then." % entry[2], 7) elif entry[1] == "dir": self.dir(entry[2]) elif entry[1] == "snapshot": self.snapshot(entry[2]) elif entry[1] == "diff": self.diff(entry[2]) else: raise RBException("Unknown increment type " + entry[2]) def copy_from_mirror(self): """Copy over current version from mirror directory""" self.mirror_file.shallowcopy(self.target) self.target.clearstats() def snapshot(self, incfile): """Process snapshot increment""" L.Log("Restoring by copying %s to %s" % (incfile, self.target), 7) incfile.shallowcopy(self.target) self.target.clearstats() def dir(self, incfile): """Process dir increment""" L.Log("Creating directory %s" % self.target, 7) self.target.mkdir() self.target.updateattribs(incfile) def diff(self, incfile): """Process diff increment""" if not self.target.lstat(): self.copy_from_mirror() BFO.patch(self.target, incfile) self.target.updateattribs(incfile) class Restorer: """Wrapper around Restore and related functions self.TR.time should be the target time files are restored to. self.exclude is a list of directories that will be excluded from the restore, and exclude_regexps is a compiled version. """ def __init__(self, TR, exclude): """Restorer initializer""" self.TR = TR self.exclude = exclude self.exclude_regexps = map(re.compile, exclude) def Restore(self, pt): """Restore files indicated in pathtriple pt.incbase should not include the extension of the increment file. """ L.Log("Restoring %s to %s" % (pt.oldfull, pt.newfull), 7) L.Log("pathtriple = %s" % pt, 8) if self.isexcluded(pt.oldfull): L.Log("Not restoring file %s due to exclude." % pt.newfull, 5) return RS = RestoreSequence(pt.oldfull, pt.newfull) for entry in pt.incdir.listdir(): if self.is_incfile_relevant(pt.incbase, entry): RS.append(pt.incdir.append(entry)) RS.restore() if pt.newfull.isdir(): self.restore_dir_contents(pt) def restore_dir_contents(self, pt): """Recursively restore contents of directory pt""" if pt.incfull.isdir(): inc_dirlisting = map(lambda p: p.inc_basename(), filter(lambda p: p.isincfile(), pt.incfull.listdir())) else: inc_dirlisting = [] if pt.oldfull.isdir(): old_dirlisting = pt.oldfull.listdir() else: old_dirlisting = [] for entry in listunion(inc_dirlisting, old_dirlisting): self.Restore(pt.append_base(entry)) def is_incfile_relevant(self, incbase, path): """Return true if path is relevant incfile to restore to incbase Only checks the base and time as indicated in path. Path does not have to be an incfile; if not return false. """ return (path.isincfile() and path.inc_basename() == incbase and path.inc_time() >= self.TR.time) def isexcluded(self, filename): """Return true if filename matches exclude list""" for regexp in self.exclude_regexps: m = regexp.match(str(filename)) if m and m.group() == filename: return 1 return None class Main: """Used to hold variables for the initialization functions self.exclude and self.exclude_mirror are passed to BackupPrepare or RestorePrepare eventually. self.rbdir is the normalized path of the rdiff-backup-data directory (used for writing log files and for finding mirror files when restoring). self.args are the arguments (not options) passed on the commandline. self.action will eventually be "backup" or "restore" """ def __init__(self): self.exclude = [] self.exclude_mirror = [] self.rbdir = None self.args = None self.action = None def Main(self): self.parse_commandline(sys.argv[1:]) self.check_for_rdiff() self.set_action_from_args() os.umask(077) if self.action == "backup": if len(self.args) != 2: self.commandline_error("Backups require " "two command line arguments") BackupPrepare(self.args, self.exclude, self.exclude_mirror).Start() else: assert self.action == "restore" RestorePrepare(self.args, self.exclude).Start() def parse_commandline(self, commandline): """Use commandline to set variables or signal error""" verbosity = 4 try: optlist, self.args = getopt.getopt(sys.argv[1:], "bv:", ["exclude=", "exclude-mirror="]) except getopt.error: self.commandline_error("Invalid commandline options") if len(self.args) == 1: action = "restore" elif len(self.args) > 2 or len(self.args) < 1: self.commandline_error("Wrong number of commandline arguments") for opt, arg in optlist: if opt == "-b": action = "backup" elif opt == "-v": verbosity = int(arg) elif opt == "--exclude": self.exclude.append(arg) elif opt == "--exclude-mirror": self.exclude_mirror.append(arg) L.verbosity = verbosity def commandline_error(self, errstring): """Signal error with commandline and exit""" sys.stderr.write("ERROR: %s\n" % errstring) print self.usage() sys.exit(2) def usage(self): """Return string summarizing usage""" return """ This script can be used to backup files incrementally, storing them as complete copies of the original plus reverse diffs describing how to recover older versions of the files. Usage: rdiff-backup [options] source backupdir rdiff-backup [options] file-to-restore [destination] Options: -b Force backup mode even if first argument appears to be a backup file. -h Display this message and exit. -v[0-9] Specify verbosity level (0 is totally silent, 9 is noisiest). This affects logging as well as output to the terminal. --exclude regexp This option can be used multiple times to exclude files from being backed up. Each argument should be a regular expression. Examples: rdiff-backup --exclude /home/bob/tmp /home/bob /mnt/backup Back files up from /home/bob to /mnt/backup, leaving increments in /mnt/backup/rdiff-backup-data. Do not back up directory /home/bob/tmp. rdiff-backup -v9 important-data.dir-3423.2001-02-14_23:23:23 temp Restore directory important-data as it was on Februrary 14, 2001, calling the new directory "temp". Keep lots of logging information. """ def check_for_rdiff(self): """Exit with error if problem with rdiff""" if os.system("rdiff --help >/dev/null"): L.FatalError(""" There was some problem running rdiff, a program this script requires. Make sure rdiff is executable and in the PATH. rdiff is currently part of the librsync package and can be downloaded from http://sourceforge.net/projects/rproxy/""") def set_action_from_args(self): """Try to determine action from form of arguments""" if not self.action: if Path(self.args[0]).isincfile(): self.action = "restore" else: self.action = "backup" L.Log("Action = " + self.action, 8) def listunion(a, b): """Make a new list composed of any element in either a or b""" d = {} for element in a + b: d[element]=1 return d.keys() def mapor(f, l): """Return true if f(e) true for e in l. Not short-circuiting.""" result = None for i in l: result = f(i) or result return result if __name__ == "__main__": Main().Main()