diff options
author | Florian Weimer <fw@deneb.enyo.de> | 2005-09-12 20:08:46 +0000 |
---|---|---|
committer | Florian Weimer <fw@deneb.enyo.de> | 2005-09-12 20:08:46 +0000 |
commit | 0e41376145c3de5e2f1c51917bba999b508137fd (patch) | |
tree | eb7c9fc0b2d44b9ff92d9b34b4793d9283c20af1 | |
parent | 7c81030dd20aaa2eeb9f35d085e8cca8284d0e79 (diff) |
lib/python/debian_support.py:
Add support for downloading package file diffs.
bin/apt-update-file:
Driver script for the new functionality.
(I will use this functionality to implement package database
replication. The goal is to keep a local copy of all the interesting
data, so that we no longer need to consult madison etc.)
git-svn-id: svn+ssh://svn.debian.org/svn/secure-testing@1939 e39458fd-73e7-0310-bf30-c45bca0a0e42
-rwxr-xr-x | bin/apt-update-file | 30 | ||||
-rw-r--r-- | lib/python/debian_support.py | 204 |
2 files changed, 233 insertions, 1 deletions
diff --git a/bin/apt-update-file b/bin/apt-update-file new file mode 100755 index 0000000000..f424195b34 --- /dev/null +++ b/bin/apt-update-file @@ -0,0 +1,30 @@ +#!/usr/bin/python + +# This script is mainly used to demo the updateFile function. + +import os +import os.path +import string +import sys + +def setup_paths(): + check_file = 'lib/python/debian_support.py' + path = os.getcwd() + while 1: + if os.path.exists("%s/%s" % (path, check_file)): + sys.path = [path + '/lib/python'] + sys.path + return path + idx = string.rfind(path, '/') + if idx == -1: + raise ImportError, "could not setup paths" + path = path[0:idx] +root_path = setup_paths() + +import bugs +import debian_support + +if len(sys.argv) <> 3: + sys.stderr.write("usage: apt-update-file REMOTE LOCAL\n") + sys.exit(1) + +debian_support.updateFile(sys.argv[1], sys.argv[2], verbose=True) diff --git a/lib/python/debian_support.py b/lib/python/debian_support.py index 1cc85e9781..01515fdf0e 100644 --- a/lib/python/debian_support.py +++ b/lib/python/debian_support.py @@ -17,7 +17,9 @@ """This module implements facilities to deal with Debian-specific metadata.""" +import os import re +import sha import types class ParseError(Exception): @@ -103,7 +105,7 @@ class PackageFile: Objects of this class can be used to read Debian's Source and Packages files.""" - re_field = re.compile(r'^([A-Za-z][A-Za-z0-9-]+):\s+(.*?)\s*$') + re_field = re.compile(r'^([A-Za-z][A-Za-z0-9-]+):(?:\s+(.*?))?\s*$') re_continuation = re.compile(r'^\s+(?:\.|(\S.*?)\s*)$') def __init__(self, name, fileObj=None): @@ -137,6 +139,7 @@ class PackageFile: if not match: self.raiseSyntaxError("expected package field") (name, contents) = match.groups() + contents = contents or '' while True: line = self.file.readline() @@ -150,6 +153,8 @@ class PackageFile: else: break pkg.append((name, contents)) + if pkg: + yield pkg def raiseSyntaxError(self, msg, lineno=None): if lineno is None: @@ -186,6 +191,188 @@ def internRelease(name, releases=listReleases()): return None del listReleases +def readLinesSHA1(lines): + m = sha.new() + for l in lines: + m.update(l) + return m.hexdigest() + +def patchesFromEdScript(source, + re_cmd=re.compile(r'^(\d+)(?:,(\d+))?([acd])$')): + """Converts source to a stream of patches. + + Patches are triples of line indexes: + + - first line to be replaced + - one past the last line being replaces + - list of line replacements + + This is enough to model arbitrary additions, deletions and + replacements. + """ + + i = iter(source) + + for line in i: + match = re_cmd.match(line) + if match is None: + raise ValueError, "invalid patch command: " + `line` + + (first, last, cmd) = match.groups() + first = int(first) + if last is not None: + last = int(last) + + if cmd == 'd': + first = first - 1 + if last is None: + last = first + 1 + yield (first, last, []) + continue + + if cmd == 'a': + if last is not None: + raise ValueError, "invalid patch argument: " + `line` + last = first + else: # cmd == c + first = first - 1 + if last is None: + last = first + 1 + + lines = [] + for l in i: + if l == '': + raise ValueError, "end of stream in command: " + `line` + if l == '.\n' or l == '.': + break + lines.append(l) + yield (first, last, lines) + +def patchLines(lines, patches): + """Applies patches to lines. Updates lines in place.""" + for (first, last, args) in patches: + lines[first:last] = args + +def replaceFile(lines, local): + new_file = file(local + '.new', 'w+') + for l in lines: + new_file.write(l) + new_file.close() + os.rename(local + '.new', local) + +def downloadGunzipLines(remote): + """Downloads a file from a remote location and gunzips it. + + Returns the lines in the file.""" + + # The implementation is rather crude, but it seems that the gzip + # module needs a real file for input. + + import gzip + import tempfile + import urllib + + (handle, fname) = tempfile.mkstemp() + try: + os.close(handle) + (filename, headers) = urllib.urlretrieve(remote, fname) + gfile = gzip.GzipFile(filename) + lines = gfile.readlines() + gfile.close() + finally: + os.unlink(fname) + return lines + +def downloadFile(remote, local): + """Copies a gzipped remote file to the local system. + + remote - URL, without the .gz suffix + local - name of the local file + """ + + lines = downloadGunzipLines(remote + '.gz') + replaceFile(lines, local) + return lines + +def updateFile(remote, local, verbose=None): + """Updates the local file by downloading a remote patch. + + Returns a list of lines in the local file. + """ + + try: + local_file = file(local) + except OSError: + return downloadFile(remote, local) + + lines = local_file.readlines() + local_file.close() + local_hash = readLinesSHA1(lines) + patches_to_apply = [] + patch_hashes = {} + + import urllib + index_name = remote + '.diff/Index' + + re_whitespace=re.compile('\s+') + + for fields in PackageFile(index_name, urllib.urlopen(index_name)): + for (field, value) in fields: + if field == 'SHA1-Current': + (remote_hash, remote_size) = value.split(' ') + if local_hash == remote_hash: + if verbose: + print "updateFile: local file is up-to-date" + return lines + continue + + if field =='SHA1-History': + for entry in value.splitlines(): + if entry == '': + continue + (hist_hash, hist_size, patch_name) \ + = re_whitespace.split(entry) + + # After the first patch, we have to apply all + # remaining patches. + if patches_to_apply or hist_hash == local_hash: + patches_to_apply.append(patch_name) + + continue + + if field == 'SHA1-Patches': + for entry in value.splitlines(): + if entry == '': + continue + (patch_hash, patch_size, patch_name) \ + = re_whitespace.split(entry) + patch_hashes[patch_name] = patch_hash + continue + + if verbose: + print "updateFile: field %s ignored" % `field` + + if not patches_to_apply: + if verbose: + print "updateFile: could not find historic entry", local_hash + return downloadFile(remote, local) + + for patch_name in patches_to_apply: + print "updateFile: downloading patch " + `patch_name` + patch_contents = downloadGunzipLines(remote + '.diff/' + patch_name + + '.gz') + if readLinesSHA1(patch_contents ) <> patch_hashes[patch_name]: + raise ValueError, "patch %s was garbled" % `patch_name` + patchLines(lines, patchesFromEdScript(patch_contents)) + + new_hash = readLinesSHA1(lines) + if new_hash <> remote_hash: + raise ValueError, ("patch failed, got %s instead of %s" + % (new_hash, remote_hash)) + + replaceFile(lines, local) + return lines + def test(): # Version assert Version('0') < Version('a') @@ -210,5 +397,20 @@ def test(): # for p in PackageFile('../../data/packages/sarge/Packages.i386'): # assert p[0][0] == 'Package' + # Helper routines + assert readLinesSHA1([]) == 'da39a3ee5e6b4b0d3255bfef95601890afd80709' + assert readLinesSHA1(['1\n', '23\n']) \ + == '14293c9bd646a15dc656eaf8fba95124020dfada' + + file_a = map(lambda x: "%d\n" % x, range(1, 18)) + file_b = ['0\n', '1\n', '<2>\n', '<3>\n', '4\n', '5\n', '7\n', '8\n', + '11\n', '12\n', '<13>\n', '14\n', '15\n', 'A\n', 'B\n', 'C\n', + '16\n', '17\n',] + patch = ['15a\n', 'A\n', 'B\n', 'C\n', '.\n', '13c\n', '<13>\n', '.\n', + '9,10d\n', '6d\n', '2,3c\n', '<2>\n', '<3>\n', '.\n', '0a\n', + '0\n', '.\n'] + patchLines(file_a, patchesFromEdScript(patch)) + assert ''.join(file_b) == ''.join(file_a) + if __name__ == "__main__": test() |