2007-01-25 00:46:46 +08:00
|
|
|
import py, os, stat, md5
|
|
|
|
from Queue import Queue
|
2007-01-24 22:24:01 +08:00
|
|
|
|
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
class RSync(object):
|
2007-02-03 04:57:59 +08:00
|
|
|
""" This class allows to synchronise files and directories
|
|
|
|
with one or multiple remote filesystems.
|
2007-01-24 22:24:01 +08:00
|
|
|
|
2007-02-03 04:57:59 +08:00
|
|
|
An RSync instance allows to dynamically add remote targets
|
|
|
|
and then synchronizes the remote filesystems with
|
|
|
|
any provided source directory.
|
2007-01-24 22:24:01 +08:00
|
|
|
|
2007-02-03 04:57:59 +08:00
|
|
|
There is limited support for symlinks, which means that symlinks
|
|
|
|
pointing to the sourcetree will be send "as is" while external
|
|
|
|
symlinks will be just copied (regardless of existance of such
|
|
|
|
a path on remote side).
|
2007-01-25 00:46:46 +08:00
|
|
|
"""
|
|
|
|
def __init__(self, callback=None, **options):
|
|
|
|
for name in options:
|
|
|
|
assert name in ('delete')
|
2007-02-03 07:32:35 +08:00
|
|
|
self._options = options
|
2007-02-04 22:40:46 +08:00
|
|
|
assert callback is None or callable(callback)
|
2007-02-03 07:32:35 +08:00
|
|
|
self._callback = callback
|
|
|
|
self._channels = {}
|
|
|
|
self._receivequeue = Queue()
|
|
|
|
self._links = []
|
2007-01-24 22:24:01 +08:00
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
def filter(self, path):
|
|
|
|
return True
|
|
|
|
|
2007-02-03 18:26:23 +08:00
|
|
|
def _end_of_channel(self, channel):
|
|
|
|
if channel in self._channels:
|
|
|
|
# too early! we must have got an error
|
|
|
|
channel.waitclose()
|
|
|
|
# or else we raise one
|
|
|
|
raise IOError('connection unexpectedly closed: %s ' % (
|
|
|
|
channel.gateway,))
|
|
|
|
|
|
|
|
def _process_link(self, channel):
|
|
|
|
for link in self._links:
|
|
|
|
channel.send(link)
|
|
|
|
# completion marker, this host is done
|
|
|
|
channel.send(42)
|
|
|
|
|
|
|
|
def _done(self, channel):
|
|
|
|
""" Call all callbacks
|
|
|
|
"""
|
|
|
|
finishedcallback = self._channels.pop(channel)
|
|
|
|
if finishedcallback:
|
|
|
|
finishedcallback()
|
|
|
|
|
|
|
|
def _list_done(self, channel):
|
|
|
|
# sum up all to send
|
|
|
|
if self._callback:
|
|
|
|
s = sum([self._paths[i] for i in self._to_send[channel]])
|
|
|
|
self._callback("list", s, channel)
|
|
|
|
|
|
|
|
def _send_item(self, channel, data):
|
|
|
|
""" Send one item
|
|
|
|
"""
|
|
|
|
modified_rel_path, checksum = data
|
|
|
|
modifiedpath = os.path.join(self._sourcedir, *modified_rel_path)
|
|
|
|
try:
|
|
|
|
f = open(modifiedpath, 'rb')
|
|
|
|
data = f.read()
|
|
|
|
except IOError:
|
|
|
|
data = None
|
|
|
|
|
|
|
|
# provide info to progress callback function
|
|
|
|
modified_rel_path = "/".join(modified_rel_path)
|
|
|
|
if data is not None:
|
|
|
|
self._paths[modified_rel_path] = len(data)
|
|
|
|
else:
|
|
|
|
self._paths[modified_rel_path] = 0
|
|
|
|
if channel not in self._to_send:
|
|
|
|
self._to_send[channel] = []
|
|
|
|
self._to_send[channel].append(modified_rel_path)
|
|
|
|
|
|
|
|
if data is not None:
|
|
|
|
f.close()
|
|
|
|
if checksum is not None and checksum == md5.md5(data).digest():
|
|
|
|
data = None # not really modified
|
|
|
|
else:
|
|
|
|
# ! there is a reason for the interning:
|
|
|
|
# sharing multiple copies of the file's data
|
|
|
|
data = intern(data)
|
|
|
|
print '%s <= %s' % (
|
|
|
|
channel.gateway.remoteaddress,
|
|
|
|
modified_rel_path)
|
|
|
|
channel.send(data)
|
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
def send(self, sourcedir):
|
2007-02-03 05:01:27 +08:00
|
|
|
""" Sends a sourcedir to all added targets.
|
2007-01-25 00:46:46 +08:00
|
|
|
"""
|
2007-02-05 20:55:31 +08:00
|
|
|
if not self._channels:
|
|
|
|
raise IOError("no targets available, maybing you "
|
|
|
|
"are trying call send() twice?")
|
2007-02-03 07:32:35 +08:00
|
|
|
self._sourcedir = str(sourcedir)
|
2007-01-25 00:46:46 +08:00
|
|
|
# normalize a trailing '/' away
|
2007-02-03 07:32:35 +08:00
|
|
|
self._sourcedir = os.path.dirname(os.path.join(self._sourcedir, 'x'))
|
2007-01-25 00:46:46 +08:00
|
|
|
# send directory structure and file timestamps/sizes
|
2007-02-03 07:32:35 +08:00
|
|
|
self._send_directory_structure(self._sourcedir)
|
2007-01-25 00:46:46 +08:00
|
|
|
|
|
|
|
# paths and to_send are only used for doing
|
|
|
|
# progress-related callbacks
|
2007-02-03 07:32:35 +08:00
|
|
|
self._paths = {}
|
|
|
|
self._to_send = {}
|
2007-01-25 00:46:46 +08:00
|
|
|
|
|
|
|
# send modified file to clients
|
2007-02-03 07:32:35 +08:00
|
|
|
while self._channels:
|
|
|
|
channel, req = self._receivequeue.get()
|
2007-01-25 00:46:46 +08:00
|
|
|
if req is None:
|
2007-02-03 18:26:23 +08:00
|
|
|
self._end_of_channel(channel)
|
2007-01-24 22:24:01 +08:00
|
|
|
else:
|
2007-01-25 00:46:46 +08:00
|
|
|
command, data = req
|
|
|
|
if command == "links":
|
2007-02-03 18:26:23 +08:00
|
|
|
self._process_link(channel)
|
2007-01-25 00:46:46 +08:00
|
|
|
elif command == "done":
|
2007-02-03 18:26:23 +08:00
|
|
|
self._done(channel)
|
2007-01-25 00:46:46 +08:00
|
|
|
elif command == "ack":
|
2007-02-03 07:32:35 +08:00
|
|
|
if self._callback:
|
|
|
|
self._callback("ack", self._paths[data], channel)
|
2007-01-25 00:46:46 +08:00
|
|
|
elif command == "list_done":
|
2007-02-03 18:26:23 +08:00
|
|
|
self._list_done(channel)
|
2007-01-25 00:46:46 +08:00
|
|
|
elif command == "send":
|
2007-02-03 18:26:23 +08:00
|
|
|
self._send_item(channel, data)
|
2007-01-25 00:46:46 +08:00
|
|
|
del data
|
2007-01-24 22:24:01 +08:00
|
|
|
else:
|
2007-01-25 00:46:46 +08:00
|
|
|
assert "Unknown command %s" % command
|
|
|
|
|
2007-02-05 20:55:31 +08:00
|
|
|
def add_target(self, gateway, destdir, finishedcallback=None):
|
|
|
|
""" Adds a remote target specified via a 'gateway'
|
|
|
|
and a remote destination directory.
|
|
|
|
"""
|
|
|
|
assert finishedcallback is None or callable(finishedcallback)
|
|
|
|
def itemcallback(req):
|
|
|
|
self._receivequeue.put((channel, req))
|
|
|
|
channel = gateway.remote_exec(REMOTE_SOURCE)
|
|
|
|
channel.setcallback(itemcallback, endmarker = None)
|
|
|
|
channel.send((str(destdir), self._options))
|
|
|
|
self._channels[channel] = finishedcallback
|
|
|
|
|
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
def _broadcast(self, msg):
|
2007-02-03 07:32:35 +08:00
|
|
|
for channel in self._channels:
|
2007-01-25 00:46:46 +08:00
|
|
|
channel.send(msg)
|
2007-01-24 22:24:01 +08:00
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
def _send_link(self, basename, linkpoint):
|
2007-02-03 07:32:35 +08:00
|
|
|
self._links.append(("link", basename, linkpoint))
|
2007-01-24 22:24:01 +08:00
|
|
|
|
2007-02-03 18:26:23 +08:00
|
|
|
def _send_directory(self, path):
|
|
|
|
# dir: send a list of entries
|
|
|
|
names = []
|
|
|
|
subpaths = []
|
|
|
|
for name in os.listdir(path):
|
|
|
|
p = os.path.join(path, name)
|
|
|
|
if self.filter(p):
|
|
|
|
names.append(name)
|
|
|
|
subpaths.append(p)
|
|
|
|
self._broadcast(names)
|
|
|
|
for p in subpaths:
|
|
|
|
self._send_directory_structure(p)
|
|
|
|
|
|
|
|
def _send_link_structure(self, path):
|
|
|
|
linkpoint = os.readlink(path)
|
|
|
|
basename = path[len(self._sourcedir) + 1:]
|
|
|
|
if not linkpoint.startswith(os.sep):
|
|
|
|
# relative link, just send it
|
|
|
|
# XXX: do sth with ../ links
|
|
|
|
self._send_link(basename, linkpoint)
|
|
|
|
elif linkpoint.startswith(self._sourcedir):
|
|
|
|
self._send_link(basename, linkpoint[len(self._sourcedir) + 1:])
|
|
|
|
else:
|
|
|
|
self._send_link(basename, linkpoint)
|
|
|
|
self._broadcast(None)
|
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
def _send_directory_structure(self, path):
|
2007-02-02 07:40:49 +08:00
|
|
|
try:
|
|
|
|
st = os.lstat(path)
|
|
|
|
except OSError:
|
|
|
|
self._broadcast((0, 0))
|
|
|
|
return
|
2007-01-25 00:46:46 +08:00
|
|
|
if stat.S_ISREG(st.st_mode):
|
|
|
|
# regular file: send a timestamp/size pair
|
|
|
|
self._broadcast((st.st_mtime, st.st_size))
|
|
|
|
elif stat.S_ISDIR(st.st_mode):
|
2007-02-03 18:26:23 +08:00
|
|
|
self._send_directory(path)
|
2007-01-25 00:46:46 +08:00
|
|
|
elif stat.S_ISLNK(st.st_mode):
|
2007-02-03 18:26:23 +08:00
|
|
|
self._send_link_structure(path)
|
2007-01-24 22:24:01 +08:00
|
|
|
else:
|
|
|
|
raise ValueError, "cannot sync %r" % (path,)
|
|
|
|
|
2007-01-25 00:46:46 +08:00
|
|
|
REMOTE_SOURCE = py.path.local(__file__).dirpath().\
|
|
|
|
join('rsync_remote.py').open().read() + "\nf()"
|
2007-01-24 22:24:01 +08:00
|
|
|
|