基於Predictive Parsing的ABNF語法分析器(八)——AbnfParser文法解析器之帶星號的情形(如char-val, prose-val)

帶星號*表示重複的次數,例如*A表示A可以重複0至任意多次,*3A表示A可以重複0次、1次、2次或3次,4*A表示A至少要重複4次。我們先來看最簡單的情形*A,ABNF的語法定義中char-val和prose-val都屬於這種。

/*
    This file is one of the component a Context-free Grammar Parser Generator,
    which accept a piece of text as the input, and generates a parser
    for the inputted context-free grammar.
    Copyright (C) 2013, Junbiao Pan (Email: [email protected])

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

//		        char-val       =  DQUOTE *(%x20-21 / %x23-7E) DQUOTE
//  DQUOTE         =  %x22
	protected CharVal char_val() throws IOException, MatchException {
		String char_val = "";
//      char-val是雙引號開始的
        assertMatch(is.peek(), 0x22);
//      把這個雙引號消化掉 :)
        is.read();
//      雙引號後面跟着的0x20-21、0x23-7E都屬於合法的字符,讀入之
        while (match(is.peek(), 0x20, 0x21) || match(is.peek(), 0x23, 0x7E)) {
			char_val += (char)is.read();
		}
//      如果不是跟着0x20-21、0x23-7E,則必須是雙引號,否則異常
		assertMatch(is.peek(), 0x22);
        is.read();
//      返回這個字符串
		return new CharVal(char_val);
	}

    //		        char-val       =  DQUOTE *(%x20-21 / %x23-7E) DQUOTE
//  DQUOTE         =  %x22
    @Test
    public void testChar_val() throws Exception {
        Tester<CharVal> tester = new Tester<CharVal>() {
            @Override
            public CharVal test(AbnfParser parser) throws MatchException, IOException {
                return parser.char_val();
            }
        };
        String input;
        input = "";
//      用來虐待char_var()方法的各種case,自己看吧
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x20);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x21);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x23);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x7E);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x20) + String.valueOf((char)0x20);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x21) + String.valueOf((char)0x21);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x23) + String.valueOf((char)0x23);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = String.valueOf((char)0x7E) + String.valueOf((char)0x7E);
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        input = "AbCd1234#$%^~!@#$%^&*()`-=_+[]\\{}|,./<>?;:'";
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"").char_val().toString());
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"A").char_val().toString());
        Assert.assertEquals(input, AbnfParserFactory.newInstance("\"" + input + "\"\"").char_val().toString());

//      
        Assertion.assertMatchException("", tester, 1, 1);
        Assertion.assertMatchException("" + (char)0x19, tester, 1, 1);
//      這個case可以發現我們這個解析器的缺陷,
//      例如當一個輸入流是隻有一個雙引號開頭,沒有雙引號結尾的情況下,
//      char_var()方法會拋出異常(這一點是可以接受的),
//      但讀指針卻無法到雙引號之前了(也就是說這個算法不支持回溯)
        Assertion.assertMatchException("\"", tester, 2, 1);
        Assertion.assertMatchException("\"a", tester, 3, 1);
        Assertion.assertMatchException("B", tester, 1, 1);

    }


//		        prose-val      =  "<" *(%x20-3D / %x3F-7E) ">"
//      prose_val()方法和char_val()方法很類似,請自行閱讀
	protected ProseVal prose_val() throws IOException, MatchException {
		String proseval = "";
		assertMatch(is.peek(), '<');
        is.read();
		while (match(is.peek(), 0x20, 0x3D) || match(is.peek(), 0x3F, 0x7E)) {
			proseval += (char)is.read();
		}
		assertMatch(is.peek(), '>');
        is.read();
		return new ProseVal(proseval);
	}

    //		        prose-val      =  "<" *(%x20-3D / %x3F-7E) ">"
    @Test
    public void testProse_val() throws Exception {
        Tester<String> tester = new Tester<String>() {
            @Override
            public String test(AbnfParser parser) throws MatchException, IOException {
                return parser.prose_val().toString();
            }
        };
        String input;
        input = String.valueOf(new char[] {'<', '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        input = String.valueOf(new char[] {'<', 0x20, '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        input = String.valueOf(new char[] {'<', 0x3D, '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        input = String.valueOf(new char[] {'<', 0x3F, '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        input = String.valueOf(new char[] {'<', 0x7E, '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        input = String.valueOf(new char[] {'<', 0x20, 0x3D, 0x3F, 0x7E, '>'});
        Assert.assertEquals(input, AbnfParserFactory.newInstance(input).prose_val().toString());
        Assert.assertEquals("<>", AbnfParserFactory.newInstance("<>>").prose_val().toString());
        Assertion.assertMatchException("<" + (char) 0x19 + ">", tester, 2, 1);
        Assertion.assertMatchException("<" + (char) 0x7F + ">", tester, 2, 1);
        Assertion.assertMatchException("<" + (char)0x20 + (char)0x19 + ">", tester, 3, 1);
    }



本系列文章索引:基於預測的ABNF文法分析器

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章