一個簡單字符串差異對比暴力算法實現

  如題:請求出兩個字符串的差異部分,並以不同的顏色區分顯示到瀏覽器上。

 

1. 解題思路

1. 找出兩字符串中相同的部分,標記;

2. 找出兩字符串中不同的部分,標記;

3. 儘可能長的匹配相同部分;

4. 儘可能少的使用複雜度(所有算法的重要目標);

 

2. 算法實現

算法實現如下:(js實現)

<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
    <title>diff function test</title>
    <script type="text/javascript">
        // 思路1: 使用雙指針暴力解法
        // 1. 先掃描a, 直到一個與b[j]相同的元素爲止, 保存爲aQueue
        // 2. 再掃描b, 每次的查找範圍爲aQueue
        // 3. 如果找到, 則進行接下來的最長匹配
        // 4. 如果沒有找到, 則讓a進行最長匹配
        // 5. 進入下一輪循環,直到a循環爲止
        // 時間複雜度: a:m, b:n, a1:m, b1:m(m+1)/2*n, ∴ O(m²n)
        var logSwitch = 1;
        function diffWithWhitespace(a, b) {
            var aValues = a.split("");
            var bValues = b.split("");
            var alen = aValues.length;
            var blen = bValues.length;
            var i = 0, j = 0;
            var equalLongest = [];
            var aDiffLongest = [];
            var aResult = [];
            var bResult = [];
            // 上一次比較結果, EQUAL:相等, ADIFF:A, BDIFF:B
            var lastDiffResultType = "EQUAL";
            while(i < alen || j < blen) {
                if(aValues[i] == bValues[j]) {
                    // todo: 記錄結果, 到a中,b中
                    if(lastDiffResultType != "EQUAL" 
                        || equalLongest.length == 0) {
                        equalLongest = [];
                        aDiffLongest = [];
                        lastDiffResultType = "EQUAL";
                        aResult.push({"item": equalLongest, "type":"equal"})
                        bResult.push({"item": equalLongest, "type":"equal"})
                    }
                    equalLongest.push(aValues[i]);
                    printLog("equal:<span style=\"background:#ffea00\">" + aValues[i] + "</span><br />");
                    i++;
                    j++;
                    continue;
                }
                var i2 = i, j2 = j;
                aDiffLongest = [];
                while((i2) < alen 
                        && aValues[i2] != bValues[j2]) {
                    aDiffLongest.push(aValues[i2]);
                    ++i2;
                }
                var aDiffTmp = [];
                var bDiffTmp = [];
                if(i2 > alen) {
                    // no equal find
                    // the last one
                }
                else if(i2 != alen && aValues[i2] == bValues[j]) {
                    aDiffLongest.push(aValues[i2]);
                }
                else{
                    if(j >= blen) {
                        aDiffTmp.push(aValues[i]);
                        // the last one
                        aResult.push({"item": aDiffTmp, "type":"diff"});
                        i++;
                        continue;
                    }
                    // a中未找到,全部退回到b中進行查找
                    bDiffTmp.push(bValues[j]);
                    bResult.push({"item": bDiffTmp, "type":"diff"})
                    // 去重相同項,也同時跳過上一相同的項
                    while(++j2 < blen 
                            && bValues[j2] == bValues[j]) {
                        bDiffTmp.push(bValues[j2]);
                        j = j2;
                    }
                    printLog("bdiff:" + bDiffTmp + "<br />");
                    j++;
                    lastDiffResultType = "BDIFF";
                    continue;
                }
                var curMaxStep = aDiffLongest.length;
                var foundCloser = 0;
                while(++j2 < blen && curMaxStep-- > 0) {
                    var i3 = 0;
                    for (;i3 < aDiffLongest.length; i3++) {
                        if(bValues[j2] == aDiffLongest[i3]) {
                            // 相同段
                            foundCloser = 1;
                            break;
                        }
                    }
                    if(foundCloser == 1) {
                        for (var c = i; c < i + i3; c++) {
                            aDiffTmp.push(aValues[c]);
                        }
                        for (var c = j ; c < j2; c++) {
                            bDiffTmp.push(bValues[c]);
                        }
                        if(aDiffTmp.length > 0) {
                            aResult.push({"item": aDiffTmp, "type":"diff"});
                            printLog("adiff:" + aDiffTmp + "<p>");
                        }
                        if(bDiffTmp.length > 0) {
                            bResult.push({"item": bDiffTmp, "type":"diff"});
                            printLog("bdiff:" + bDiffTmp + "<p>");
                        }
                        var eqItem = bValues[j2];
                        if(lastDiffResultType != "EQUAL"
                            || equalLongest.length == 0) {
                            equalLongest = [];
                            aDiffLongest = [];
                            lastDiffResultType = "EQUAL";
                            aResult.push({"item": equalLongest, "type":"equal"})
                            bResult.push({"item": equalLongest, "type":"equal"})
                        }
                        equalLongest.push(eqItem);
                        printLog("equal:<span style=\"background:#ffea00\">" + eqItem +"</span><br />");
                        aDiffLongest.splice(0, 1);
                        i = i + i3;
                        j = j2;
                        i++;
                        j++;
                        break;
                    }
                    else {
                        if(aDiffLongest.length == 0) {
                            lastDiffResultType = "BDIFF";
                        }
                        else{
                            lastDiffResultType = "ADIFF";
                        }
                        lastDiffResultType = "DIFF";
                    }
                }
                if(!foundCloser) {
                    for (var c = aDiffLongest.length - 1; c > 0; c--) {
                        aDiffTmp.push(aDiffLongest[0]);
                        aDiffLongest.splice(0, 1);    
                    }
                    for (var c = j ; c < j2; c++) {
                        bDiffTmp.push(bValues[c]);
                    }
                    if(aDiffTmp.length > 0) {
                        aResult.push({"item": aDiffTmp, "type":"diff"})
                        printLog("adiff:" + aDiffTmp + "<p>");
                    }
                    if(bDiffTmp.length > 0) {
                        bResult.push({"item": bDiffTmp, "type":"diff"});
                        printLog("bdiff:" + bDiffTmp + "<p>");
                    }
                    var eqItem = aDiffLongest[0];
                    if(lastDiffResultType != "EQUAL"
                        || equalLongest.length == 0) {
                        equalLongest = [];
                        aDiffLongest = [];
                        bDiffLongest = [];
                        lastDiffResultType = "EQUAL";
                        aResult.push({"item": equalLongest, "type":"equal"})
                        bResult.push({"item": equalLongest, "type":"equal"})
                    }
                    equalLongest.push(eqItem);
                    printLog("equal:<span style=\"background:#ffea00\">" + eqItem +"</span><br />");
                    aDiffLongest.splice(0, 1);
                    i = i2 - i3;
                    j = j2;
                    i++;
                    j++;
                    lastDiffResultType = "ADIFF";
                    continue;
                }

            }
            return {"a": aResult, "b":bResult};
        }
        turnOffLogSwitch();
        var aText = "今天 是個 好天氣";
        var bText = "今天, 真是 一個 好陰天啊";
        diffWithWhitespaceAndAppendBody(aText, bText);

        aText = "ParseException";
        bText = "RuntimeException";
        diffWithWhitespaceAndAppendBody(aText, bText);


        function diffWithWhitespaceAndAppendBody(a, b) {
            printLog(aText);
            printLog("<p>");
            printLog(bText);
            printLog("<p>compare result: <p>");
            var diffResult = diffWithWhitespace(aText, bText);
            document.write("<p>A SIDE: <p>")
            diffResult.a.forEach(function (r) {
                writeDiffResult(r);
            });
            document.write("<p>B SIDE: <p>")
            diffResult.b.forEach(function (r) {
                writeDiffResult(r);
            });
        }
        function writeDiffResult(structText) {
            var item = structText.item.join("");
            if(structText.type != "equal") {
                item = "<span style=\"background:#ffea00\">" + item + "</span>";
            }
            document.write(item);
        }
        function printLog(msg) {
            if(logSwitch == 1) {
                document.write(msg);
            }
        }
        function turnOnLogSwitch() {
            logSwitch = 1;
        }
        function turnOffLogSwitch() {
            logSwitch = 0;
        }
    </script>
</head>
<body>

</body>
</html>
<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
    <title>diff function test</title>
    <script type="text/javascript">
        // 思路1: 使用雙指針暴力解法
        // 1. 先掃描a, 直到一個與b[j]相同的元素爲止, 保存爲aQueue
        // 2. 再掃描b, 每次的查找範圍爲aQueue
        // 3. 如果找到, 則進行接下來的最長匹配
        // 4. 如果沒有找到, 則讓a進行最長匹配
        // 5. 進入下一輪循環,直到a循環爲止
        // 時間複雜度: a:m, b:n, a1:m, b1:m(m+1)/2*n, ∴ O(m²n)
        var logSwitch = 1;
        function diffWithWhitespace(a, b) {
            var aValues = a.split("");
            var bValues = b.split("");
            var alen = aValues.length;
            var blen = bValues.length;
            var i = 0, j = 0;
            var equalLongest = [];
            var aDiffLongest = [];
            var aResult = [];
            var bResult = [];
            // 上一次比較結果, EQUAL:相等, ADIFF:A, BDIFF:B
            var lastDiffResultType = "EQUAL";
            while(i < alen || j < blen) {
                if(aValues[i] == bValues[j]) {
                    // todo: 記錄結果, 到a中,b中
                    if(lastDiffResultType != "EQUAL" 
                        || equalLongest.length == 0) {
                        equalLongest = [];
                        aDiffLongest = [];
                        lastDiffResultType = "EQUAL";
                        aResult.push({"item": equalLongest, "type":"equal"})
                        bResult.push({"item": equalLongest, "type":"equal"})
                    }
                    equalLongest.push(aValues[i]);
                    printLog("equal:<span style=\"background:#ffea00\">" + aValues[i] + "</span><br />");
                    i++;
                    j++;
                    continue;
                }
                var i2 = i, j2 = j;
                aDiffLongest = [];
                while((i2) < alen 
                        && aValues[i2] != bValues[j2]) {
                    aDiffLongest.push(aValues[i2]);
                    ++i2;
                }
                var aDiffTmp = [];
                var bDiffTmp = [];
                if(i2 > alen) {
                    // no equal find
                    // the last one
                }
                else if(i2 != alen && aValues[i2] == bValues[j]) {
                    aDiffLongest.push(aValues[i2]);
                }
                else{
                    if(j >= blen) {
                        aDiffTmp.push(aValues[i]);
                        // the last one
                        aResult.push({"item": aDiffTmp, "type":"diff"});
                        i++;
                        continue;
                    }
                    // a中未找到,全部退回到b中進行查找
                    bDiffTmp.push(bValues[j]);
                    bResult.push({"item": bDiffTmp, "type":"diff"})
                    // 去重相同項,也同時跳過上一相同的項
                    while(++j2 < blen 
                            && bValues[j2] == bValues[j]) {
                        bDiffTmp.push(bValues[j2]);
                        j = j2;
                    }
                    printLog("bdiff:" + bDiffTmp + "<br />");
                    j++;
                    lastDiffResultType = "BDIFF";
                    continue;
                }
                var curMaxStep = aDiffLongest.length;
                var foundCloser = 0;
                while(++j2 < blen && curMaxStep-- > 0) {
                    var i3 = 0;
                    for (;i3 < aDiffLongest.length; i3++) {
                        if(bValues[j2] == aDiffLongest[i3]) {
                            // 相同段
                            foundCloser = 1;
                            break;
                        }
                    }
                    if(foundCloser == 1) {
                        for (var c = i; c < i + i3; c++) {
                            aDiffTmp.push(aValues[c]);
                        }
                        for (var c = j ; c < j2; c++) {
                            bDiffTmp.push(bValues[c]);
                        }
                        if(aDiffTmp.length > 0) {
                            aResult.push({"item": aDiffTmp, "type":"diff"});
                            printLog("adiff:" + aDiffTmp + "<p>");
                        }
                        if(bDiffTmp.length > 0) {
                            bResult.push({"item": bDiffTmp, "type":"diff"});
                            printLog("bdiff:" + bDiffTmp + "<p>");
                        }
                        var eqItem = bValues[j2];
                        if(lastDiffResultType != "EQUAL"
                            || equalLongest.length == 0) {
                            equalLongest = [];
                            aDiffLongest = [];
                            lastDiffResultType = "EQUAL";
                            aResult.push({"item": equalLongest, "type":"equal"})
                            bResult.push({"item": equalLongest, "type":"equal"})
                        }
                        equalLongest.push(eqItem);
                        printLog("equal:<span style=\"background:#ffea00\">" + eqItem +"</span><br />");
                        aDiffLongest.splice(0, 1);
                        i = i + i3;
                        j = j2;
                        i++;
                        j++;
                        break;
                    }
                    else {
                        if(aDiffLongest.length == 0) {
                            lastDiffResultType = "BDIFF";
                        }
                        else{
                            lastDiffResultType = "ADIFF";
                        }
                        lastDiffResultType = "DIFF";
                    }
                }
                if(!foundCloser) {
                    for (var c = aDiffLongest.length - 1; c > 0; c--) {
                        aDiffTmp.push(aDiffLongest[0]);
                        aDiffLongest.splice(0, 1);    
                    }
                    for (var c = j ; c < j2; c++) {
                        bDiffTmp.push(bValues[c]);
                    }
                    if(aDiffTmp.length > 0) {
                        aResult.push({"item": aDiffTmp, "type":"diff"})
                        printLog("adiff:" + aDiffTmp + "<p>");
                    }
                    if(bDiffTmp.length > 0) {
                        bResult.push({"item": bDiffTmp, "type":"diff"});
                        printLog("bdiff:" + bDiffTmp + "<p>");
                    }
                    var eqItem = aDiffLongest[0];
                    if(lastDiffResultType != "EQUAL"
                        || equalLongest.length == 0) {
                        equalLongest = [];
                        aDiffLongest = [];
                        bDiffLongest = [];
                        lastDiffResultType = "EQUAL";
                        aResult.push({"item": equalLongest, "type":"equal"})
                        bResult.push({"item": equalLongest, "type":"equal"})
                    }
                    equalLongest.push(eqItem);
                    printLog("equal:<span style=\"background:#ffea00\">" + eqItem +"</span><br />");
                    aDiffLongest.splice(0, 1);
                    i = i2 - i3;
                    j = j2;
                    i++;
                    j++;
                    lastDiffResultType = "ADIFF";
                    continue;
                }

            }
            return {"a": aResult, "b":bResult};
        }
        turnOffLogSwitch();
        var aText = "今天 是個 好天氣";
        var bText = "今天, 真是 一個 好陰天啊";
        diffWithWhitespaceAndAppendBody(aText, bText);

        aText = "ParseException";
        bText = "RuntimeException";
        diffWithWhitespaceAndAppendBody(aText, bText);


        function diffWithWhitespaceAndAppendBody(a, b) {
            printLog(aText);
            printLog("<p>");
            printLog(bText);
            printLog("<p>compare result: <p>");
            var diffResult = diffWithWhitespace(aText, bText);
            document.write("<p>A SIDE: <p>")
            diffResult.a.forEach(function (r) {
                writeDiffResult(r);
            });
            document.write("<p>B SIDE: <p>")
            diffResult.b.forEach(function (r) {
                writeDiffResult(r);
            });
        }
        function writeDiffResult(structText) {
            var item = structText.item.join("");
            if(structText.type != "equal") {
                item = "<span style=\"background:#ffea00\">" + item + "</span>";
            }
            document.write(item);
        }
        function printLog(msg) {
            if(logSwitch == 1) {
                document.write(msg);
            }
        }
        function turnOnLogSwitch() {
            logSwitch = 1;
        }
        function turnOffLogSwitch() {
            logSwitch = 0;
        }
    </script>
</head>
<body>

</body>
</html>

  算法屬於暴力解法,簡單使用了雙指針法,沒有太多技巧,需要進一步優化。

 

3. 一點閒話

      需要注意的量,雖然樣子很像最長公共子序列的命題,但卻並不是一回事。供參考。

      與beyond compare軟件結果相比,還是不太準確,最長匹配這個原則還沒有體現好。另外,對於多行型 的字符串比較,並沒有給出參考,但一般的,多行會被當作整體處理,行與行之間都有單字符類的比較。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章