vpx/tools/intersect-diffs.py

#!/usr/bin/env python
##  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
##
##  Use of this source code is governed by a BSD-style license
##  that can be found in the LICENSE file in the root of the source
##  tree. An additional intellectual property rights grant can be found
##  in the file PATENTS.  All contributing project authors may
##  be found in the AUTHORS file in the root of the source tree.
##
"""Calculates the "intersection" of two unified diffs.

Given two diffs, A and B, it finds all hunks in B that had non-context lines
in A and prints them to stdout. This is useful to determine the hunks in B that
are relevant to A. The resulting file can be applied with patch(1) on top of A.
"""

__author__ = "jkoleszar@google.com"

import re
import sys


class DiffLines(object):
    """A container for one half of a diff."""

    def __init__(self, filename, offset, length):
        self.filename = filename
        self.offset = offset
        self.length = length
        self.lines = []
        self.delta_line_nums = []

    def Append(self, line):
        l = len(self.lines)
        if line[0] != " ":
            self.delta_line_nums.append(self.offset + l)
        self.lines.append(line[1:])
        assert l+1 <= self.length

    def Complete(self):
        return len(self.lines) == self.length

    def __contains__(self, item):
        return item >= self.offset and item <= self.offset + self.length - 1


class DiffHunk(object):
    """A container for one diff hunk, consisting of two DiffLines."""

    def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
        self.header = header
        self.left = DiffLines(file_a, start_a, len_a)
        self.right = DiffLines(file_b, start_b, len_b)
        self.lines = []

    def Append(self, line):
        """Adds a line to the DiffHunk and its DiffLines children."""
        if line[0] == "-":
            self.left.Append(line)
        elif line[0] == "+":
            self.right.Append(line)
        elif line[0] == " ":
            self.left.Append(line)
            self.right.Append(line)
        else:
            assert False, ("Unrecognized character at start of diff line "
                           "%r" % line[0])
        self.lines.append(line)

    def Complete(self):
        return self.left.Complete() and self.right.Complete()

    def __repr__(self):
        return "DiffHunk(%s, %s, len %d)" % (
            self.left.filename, self.right.filename,
            max(self.left.length, self.right.length))


def ParseDiffHunks(stream):
    """Walk a file-like object, yielding DiffHunks as they're parsed."""

    file_regex = re.compile(r"(\+\+\+|---) (\S+)")
    range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
    hunk = None
    while True:
        line = stream.readline()
        if not line:
            break

        if hunk is None:
            # Parse file names
            diff_file = file_regex.match(line)
            if diff_file:
              if line.startswith("---"):
                  a_line = line
                  a = diff_file.group(2)
                  continue
              if line.startswith("+++"):
                  b_line = line
                  b = diff_file.group(2)
                  continue

            # Parse offset/lengths
            diffrange = range_regex.match(line)
            if diffrange:
                if diffrange.group(2):
                    start_a = int(diffrange.group(1))
                    len_a = int(diffrange.group(3))
                else:
                    start_a = 1
                    len_a = int(diffrange.group(1))

                if diffrange.group(5):
                    start_b = int(diffrange.group(4))
                    len_b = int(diffrange.group(6))
                else:
                    start_b = 1
                    len_b = int(diffrange.group(4))

                header = [a_line, b_line, line]
                hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
        else:
            # Add the current line to the hunk
            hunk.Append(line)

            # See if the whole hunk has been parsed. If so, yield it and prepare
            # for the next hunk.
            if hunk.Complete():
                yield hunk
                hunk = None

    # Partial hunks are a parse error
    assert hunk is None


def FormatDiffHunks(hunks):
    """Re-serialize a list of DiffHunks."""
    r = []
    last_header = None
    for hunk in hunks:
        this_header = hunk.header[0:2]
        if last_header != this_header:
            r.extend(hunk.header)
            last_header = this_header
        else:
            r.extend(hunk.header[2])
        r.extend(hunk.lines)
        r.append("\n")
    return "".join(r)


def ZipHunks(rhs_hunks, lhs_hunks):
    """Join two hunk lists on filename."""
    for rhs_hunk in rhs_hunks:
        rhs_file = rhs_hunk.right.filename.split("/")[1:]

        for lhs_hunk in lhs_hunks:
            lhs_file = lhs_hunk.left.filename.split("/")[1:]
            if lhs_file != rhs_file:
                continue
            yield (rhs_hunk, lhs_hunk)


def main():
    old_hunks = [x for x in ParseDiffHunks(open(sys.argv[1], "r"))]
    new_hunks = [x for x in ParseDiffHunks(open(sys.argv[2], "r"))]
    out_hunks = []

    # Join the right hand side of the older diff with the left hand side of the
    # newer diff.
    for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks):
        if new_hunk in out_hunks:
            continue
        old_lines = old_hunk.right
        new_lines = new_hunk.left

        # Determine if this hunk overlaps any non-context line from the other
        for i in old_lines.delta_line_nums:
            if i in new_lines:
                out_hunks.append(new_hunk)
                break

    if out_hunks:
        print FormatDiffHunks(out_hunks)
    else:
        sys.exit(1)

if __name__ == "__main__":
    main()