Commit 41fa8c35 authored by jkobus's avatar jkobus Committed by Jarek Kobus
Browse files

Add cleanupSemantics method, produce nicer diff output



Change-Id: I877e06c930c219ad23af4b7c6078d8c8db3706e9
Reviewed-by: default avatarhjk <hjk121@nokiamail.com>
parent e7cb32d6
......@@ -254,8 +254,8 @@ void DiffEditorWidget::setDiff(const QString &leftText, const QString &rightText
{
// QTime time;
// time.start();
Differ diffGenerator;
QList<Diff> list = diffGenerator.diff(leftText, rightText);
Differ differ;
QList<Diff> list = differ.cleanupSemantics(differ.diff(leftText, rightText));
// int ela = time.elapsed();
// qDebug() << "Time spend in diff:" << ela;
setDiff(list);
......
......@@ -524,11 +524,153 @@ QList<Diff> Differ::squashEqualities(const QList<Diff> &diffList)
return squashedDiffList;
}
struct EqualityData
{
int equalityIndex;
int textCount;
int deletesBefore;
int insertsBefore;
int deletesAfter;
int insertsAfter;
};
QList<Diff> Differ::cleanupSemantics(const QList<Diff> &diffList)
{
int deletes = 0;
int inserts = 0;
// equality index, equality data
QList<EqualityData> equalities;
for (int i = 0; i <= diffList.count(); i++) {
Diff diff = i < diffList.count()
? diffList.at(i)
: Diff(Diff::Equal, QString()); // dummy, ensure we process to the end even when diffList doesn't end with equality
if (diff.command == Diff::Equal) {
if (!equalities.isEmpty()) {
EqualityData &previousData = equalities.last();
previousData.deletesAfter = deletes;
previousData.insertsAfter = inserts;
}
if (i < diffList.count()) { // don't insert dummy
EqualityData data;
data.equalityIndex = i;
data.textCount = diff.text.count();
data.deletesBefore = deletes;
data.insertsBefore = inserts;
equalities.append(data);
deletes = 0;
inserts = 0;
}
} else {
if (diff.command == Diff::Delete)
deletes += diff.text.count();
else if (diff.command == Diff::Insert)
inserts += diff.text.count();
}
}
QMap<int, bool> equalitiesToBeSplit;
int i = 0;
while (i < equalities.count()) {
const EqualityData data = equalities.at(i);
if (data.textCount <= qMax(data.deletesBefore, data.insertsBefore)
&& data.textCount <= qMax(data.deletesAfter, data.insertsAfter)) {
if (i > 0) {
EqualityData &previousData = equalities[i - 1];
previousData.deletesAfter += data.textCount + data.deletesAfter;
previousData.insertsAfter += data.textCount + data.insertsAfter;
}
if (i < equalities.count() - 1) {
EqualityData &nextData = equalities[i + 1];
nextData.deletesBefore += data.textCount + data.deletesBefore;
nextData.insertsBefore += data.textCount + data.insertsBefore;
}
equalitiesToBeSplit.insert(data.equalityIndex, true);
equalities.removeAt(i);
if (i > 0) {
i--; // reexamine previous equality
}
} else {
i++;
}
}
QList<Diff> newDiffList;
for (int i = 0; i < diffList.count(); i++) {
const Diff &diff = diffList.at(i);
if (equalitiesToBeSplit.contains(i)) {
newDiffList.append(Diff(Diff::Delete, diff.text));
newDiffList.append(Diff(Diff::Insert, diff.text));
} else {
newDiffList.append(diff);
}
}
return cleanupOverlaps(merge(newDiffList));
}
QList<Diff> Differ::cleanupOverlaps(const QList<Diff> &diffList)
{
// Find overlaps between deletions and insetions.
// The "diffList" already contains at most one deletion and
// one insertion between two equalities, in this order.
// Eliminate overlaps, e.g.:
// DEL(ABCXXXX), INS(XXXXDEF) -> DEL(ABC), EQ(XXXX), INS(DEF)
// DEL(XXXXABC), INS(DEFXXXX) -> INS(DEF), EQ(XXXX), DEL(ABC)
QList<Diff> newDiffList;
int i = 0;
while (i < diffList.count()) {
Diff thisDiff = diffList.at(i);
Diff nextDiff = i < diffList.count() - 1
? diffList.at(i + 1)
: Diff(Diff::Equal, QString());
if (thisDiff.command == Diff::Delete
&& nextDiff.command == Diff::Insert) {
const int delInsOverlap = commonOverlap(thisDiff.text, nextDiff.text);
const int insDelOverlap = commonOverlap(nextDiff.text, thisDiff.text);
if (delInsOverlap >= insDelOverlap) {
if (delInsOverlap > thisDiff.text.count() / 2
|| delInsOverlap > nextDiff.text.count() / 2) {
thisDiff.text = thisDiff.text.left(thisDiff.text.count() - delInsOverlap);
Diff equality = Diff(Diff::Equal, nextDiff.text.left(delInsOverlap));
nextDiff.text = nextDiff.text.mid(delInsOverlap);
newDiffList.append(thisDiff);
newDiffList.append(equality);
newDiffList.append(nextDiff);
} else {
newDiffList.append(thisDiff);
newDiffList.append(nextDiff);
}
} else {
if (insDelOverlap > thisDiff.text.count() / 2
|| insDelOverlap > nextDiff.text.count() / 2) {
nextDiff.text = nextDiff.text.left(nextDiff.text.count() - insDelOverlap);
Diff equality = Diff(Diff::Equal, thisDiff.text.left(insDelOverlap));
thisDiff.text = thisDiff.text.mid(insDelOverlap);
newDiffList.append(nextDiff);
newDiffList.append(equality);
newDiffList.append(thisDiff);
} else {
newDiffList.append(thisDiff);
newDiffList.append(nextDiff);
}
}
i += 2;
} else {
newDiffList.append(thisDiff);
i++;
}
}
return newDiffList;
}
int Differ::commonPrefix(const QString &text1, const QString &text2) const
{
int i = 0;
const int minCount = qMin(text1.count(), text2.count());
while (i < minCount) {
const int text1Count = text1.count();
const int text2Count = text2.count();
const int maxCount = qMin(text1Count, text2Count);
while (i < maxCount) {
if (text1.at(i) != text2.at(i))
break;
i++;
......@@ -541,8 +683,8 @@ int Differ::commonSuffix(const QString &text1, const QString &text2) const
int i = 0;
const int text1Count = text1.count();
const int text2Count = text2.count();
const int minCount = qMin(text1.count(), text2.count());
while (i < minCount) {
const int maxCount = qMin(text1Count, text2Count);
while (i < maxCount) {
if (text1.at(text1Count - i - 1) != text2.at(text2Count - i - 1))
break;
i++;
......@@ -550,4 +692,18 @@ int Differ::commonSuffix(const QString &text1, const QString &text2) const
return i;
}
int Differ::commonOverlap(const QString &text1, const QString &text2) const
{
int i = 0;
const int text1Count = text1.count();
const int text2Count = text2.count();
const int maxCount = qMin(text1Count, text2Count);
while (i < maxCount) {
if (text1.midRef(text1Count - maxCount + i) == text2.leftRef(maxCount - i))
return maxCount - i;
i++;
}
return 0;
}
} // namespace DiffEditor
......@@ -72,6 +72,8 @@ public:
void setDiffMode(DiffMode mode);
bool diffMode() const;
QList<Diff> merge(const QList<Diff> &diffList);
QList<Diff> cleanupSemantics(const QList<Diff> &diffList);
private:
QList<Diff> preprocess1AndDiff(const QString &text1, const QString &text2);
QList<Diff> preprocess2AndDiff(const QString &text1, const QString &text2);
......@@ -93,6 +95,8 @@ private:
int subTextStart);
int commonPrefix(const QString &text1, const QString &text2) const;
int commonSuffix(const QString &text1, const QString &text2) const;
int commonOverlap(const QString &text1, const QString &text2) const;
QList<Diff> cleanupOverlaps(const QList<Diff> &diffList);
DiffMode m_diffMode;
DiffMode m_currentDiffMode;
};
......
......@@ -77,6 +77,8 @@ private Q_SLOTS:
void myers();
void merge_data();
void merge();
void cleanupSemantics_data();
void cleanupSemantics();
};
......@@ -419,6 +421,183 @@ void tst_Differ::merge()
QCOMPARE(result, expected);
}
void tst_Differ::cleanupSemantics_data()
{
QTest::addColumn<QList<Diff> >("input");
QTest::addColumn<QList<Diff> >("expected");
QTest::newRow("Empty")
<< QList<Diff>()
<< QList<Diff>();
QTest::newRow("Don't cleanup 1")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("AB"))
<< Diff(Diff::Insert, QString("CD"))
<< Diff(Diff::Equal, QString("EF"))
<< Diff(Diff::Delete, QString("G")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("AB"))
<< Diff(Diff::Insert, QString("CD"))
<< Diff(Diff::Equal, QString("EF"))
<< Diff(Diff::Delete, QString("G")));
QTest::newRow("Don't cleanup 2")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Insert, QString("DEF"))
<< Diff(Diff::Equal, QString("GHIJ"))
<< Diff(Diff::Delete, QString("KLMN")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Insert, QString("DEF"))
<< Diff(Diff::Equal, QString("GHIJ"))
<< Diff(Diff::Delete, QString("KLMN")));
QTest::newRow("Don't cleanup 3")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Insert, QString("DEF"))
<< Diff(Diff::Equal, QString("GHIJ"))
<< Diff(Diff::Delete, QString("KLMNO"))
<< Diff(Diff::Insert, QString("PQRST")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Insert, QString("DEF"))
<< Diff(Diff::Equal, QString("GHIJ"))
<< Diff(Diff::Delete, QString("KLMNO"))
<< Diff(Diff::Insert, QString("PQRST")));
QTest::newRow("Simple cleanup")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("A"))
<< Diff(Diff::Equal, QString("B"))
<< Diff(Diff::Delete, QString("C")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Insert, QString("B")));
QTest::newRow("Backward cleanup")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("AB"))
<< Diff(Diff::Equal, QString("CD"))
<< Diff(Diff::Delete, QString("E"))
<< Diff(Diff::Equal, QString("F"))
<< Diff(Diff::Insert, QString("G")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCDEF"))
<< Diff(Diff::Insert, QString("CDFG")));
QTest::newRow("Multi cleanup")
<< (QList<Diff>()
<< Diff(Diff::Insert, QString("A"))
<< Diff(Diff::Equal, QString("B"))
<< Diff(Diff::Delete, QString("C"))
<< Diff(Diff::Insert, QString("D"))
<< Diff(Diff::Equal, QString("E"))
<< Diff(Diff::Insert, QString("F"))
<< Diff(Diff::Equal, QString("G"))
<< Diff(Diff::Delete, QString("H"))
<< Diff(Diff::Insert, QString("I")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("BCEGH"))
<< Diff(Diff::Insert, QString("ABDEFGI")));
QTest::newRow("Fraser's example")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("H"))
<< Diff(Diff::Insert, QString("My g"))
<< Diff(Diff::Equal, QString("over"))
<< Diff(Diff::Delete, QString("i"))
<< Diff(Diff::Equal, QString("n"))
<< Diff(Diff::Delete, QString("g"))
<< Diff(Diff::Insert, QString("ment")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("Hovering"))
<< Diff(Diff::Insert, QString("My government")));
QTest::newRow("Overlap keep without equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCXXX"))
<< Diff(Diff::Insert, QString("XXXDEF")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCXXX"))
<< Diff(Diff::Insert, QString("XXXDEF")));
QTest::newRow("Overlap remove equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Equal, QString("XXX"))
<< Diff(Diff::Insert, QString("DEF")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCXXX"))
<< Diff(Diff::Insert, QString("XXXDEF")));
QTest::newRow("Overlap add equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCXXXX"))
<< Diff(Diff::Insert, QString("XXXXDEF")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Insert, QString("DEF")));
QTest::newRow("Overlap keep equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Insert, QString("DEF")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Insert, QString("DEF")));
QTest::newRow("Reverse overlap keep without equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("XXXABC"))
<< Diff(Diff::Insert, QString("DEFXXX")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("XXXABC"))
<< Diff(Diff::Insert, QString("DEFXXX")));
QTest::newRow("Reverse overlap remove equality")
<< (QList<Diff>()
<< Diff(Diff::Insert, QString("ABC"))
<< Diff(Diff::Equal, QString("XXX"))
<< Diff(Diff::Delete, QString("DEF")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("XXXDEF"))
<< Diff(Diff::Insert, QString("ABCXXX")));
QTest::newRow("Reverse overlap add equality")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("XXXXABC"))
<< Diff(Diff::Insert, QString("DEFXXXX")))
<< (QList<Diff>()
<< Diff(Diff::Insert, QString("DEF"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Delete, QString("ABC")));
QTest::newRow("Reverse overlap keep equality")
<< (QList<Diff>()
<< Diff(Diff::Insert, QString("ABC"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Delete, QString("DEF")))
<< (QList<Diff>()
<< Diff(Diff::Insert, QString("ABC"))
<< Diff(Diff::Equal, QString("XXXX"))
<< Diff(Diff::Delete, QString("DEF")));
QTest::newRow("Two overlaps")
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABCDEFG"))
<< Diff(Diff::Insert, QString("DEFGHIJKLM"))
<< Diff(Diff::Equal, QString("NOPQR"))
<< Diff(Diff::Delete, QString("STU"))
<< Diff(Diff::Insert, QString("TUVW")))
<< (QList<Diff>()
<< Diff(Diff::Delete, QString("ABC"))
<< Diff(Diff::Equal, QString("DEFG"))
<< Diff(Diff::Insert, QString("HIJKLM"))
<< Diff(Diff::Equal, QString("NOPQR"))
<< Diff(Diff::Delete, QString("S"))
<< Diff(Diff::Equal, QString("TU"))
<< Diff(Diff::Insert, QString("VW")));
}
void tst_Differ::cleanupSemantics()
{
QFETCH(QList<Diff>, input);
QFETCH(QList<Diff>, expected);
Differ differ;
QList<Diff> result = differ.cleanupSemantics(input);
QCOMPARE(result, expected);
}
QTEST_MAIN(tst_Differ)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment