[C++] 無聊的人寫的LL分析和LR分析

模型抽取

一個模型Model<term, uterm>擁有兩個元類型Term終結符類型和Uterm非終結符類型,對應於文法中的概念G(N,T,P,S)G(N,T,P,S)如下:

template<typename term_t = int32_t, typename uterm_t = int32_t>
class Model {
    friend class Processor;
public:
    using string = std::string;
    using strvec = std::vector<string>;
    using symbol_t = Symbol<term_t, uterm_t>;
    using model = Model<term_t, uterm_t>;
    std::map<string, symbol_t> sym_table;
    std::vector<Production<symbol_t>> prods;
    symbol_t begin_symbol;
}

其中symtable對應NTN\cup T,prods對應PP, begin symbol對應SS
這個模型類只負責將文件內的模型讀入到內存中,不負責具體的語法構建。

遞歸調用分析

遞歸調用分析寫出的代碼只針對具體一種語言,因此實用性不強。代碼如下:

template<typename token_t, class Source, class TokenTable>
class RecursiveAParser {
    using istream = Source;
    using result_t = Result<token_t, UTerm>;
    using node_t = ASTNode<token_t, UTerm>;
    istream &ref;
    token_t token;

    result_t *result;
public:
    RecursiveAParser(istream &ref): ref(ref) {}
    result_t* parse() {
        result = new result_t();
        result->code = ResultCode::Ok;
        auto hdl = result;
        read();
        parseE(result->rt = result->alloc(UTerm::E, true));
        result = nullptr;
        return hdl;
    }
private:
    void read() {
        ref >> token;
    }
    void error() {
        result->code = ResultCode::Error;
        std::cout << "error " << token << std::endl;
    }
    void parseE(node_t* &rt) {
        parseT(rt->insert(result->alloc(UTerm::T, true)));
        if (token != TokenTable::eof) {
            parseED(rt->insert(result->alloc(UTerm::ED, true)));
        }
    }
    void parseED(node_t* &rt) {
        if (token == TokenTable::add || token == TokenTable::sub) {
            rt->insert(result->alloc(token));
            read();
            parseT(rt->insert(result->alloc(UTerm::T, true)));
            parseED(rt->insert(result->alloc(UTerm::ED, true)));
        }
    }
    void parseT(node_t* &rt) {
        parseF(rt->insert(result->alloc(UTerm::F, true)));
        if (token != TokenTable::eof) {
            parseTD(rt->insert(result->alloc(UTerm::TD, true)));
        }
    }
    void parseTD(node_t* &rt) {
        if (token == TokenTable::mul || token == TokenTable::div) {
            rt->insert(result->alloc(token));
            read();
            parseF(rt->insert(result->alloc(UTerm::F, true)));
            parseTD(rt->insert(result->alloc(UTerm::TD, true)));
        }
    }
    void parseF(node_t* &rt) {
        if (token == TokenTable::lbr) {
            rt->insert(result->alloc(token));
            read();
            parseE(rt->insert(result->alloc(UTerm::E, true)));
            if (token == TokenTable::rbr) {
                rt->insert(result->alloc(token));
                read();
            } else {
                error();
            }
        } else if (token == TokenTable::num) {
            rt->insert(result->alloc(token));
            read();
        } else {
            error();
        }
    }
};

istreamlexer的輸出流,經過遞歸函數組織以後生成語法樹。生成結果與樹節點數據結構如下:

template<typename term_t, typename uterm_t>
struct Result {
    using node_t = ASTNode<term_t, uterm_t>;
    node_t *rt;
    ResultCode code;
}

template<typename term_t, typename uterm_t>
struct ASTNode {
    using symbol_t = Symbol<term_t, uterm_t>;
    symbol_t symbol;
    std::vector<ASTNode*> ch;
};

測試結果

( num + num / ( num + num * num ) )
{ code: Ok, node: {
    {ut,E}, {{{ut,T}, {{{ut,F}, {
        {{t,Lbr}, {}},
        {{ut,E}, {{{ut,T}, {
            {{ut,F}, {
            {{t,Num}, {}}, }},
            {{ut,TD}, {}}, }},
            {{ut,ED}, {
                {{t,Add}, {}},
                {{ut,T}, {{{ut,F},
                    {{{t,Num}, {}}, }}, {{ut,TD},
                    {{{t,Div}, {}},{{ut,F}, {
                        {{t,Lbr}, {}},
                        {{ut,E}, {{{ut,T}, {{{ut,F}, {
                        {{t,Num}, {}}, }}, {{ut,TD}, {}}, }}, {{ut,ED}, {
                            {{t,Add}, {}}, {{ut,T}, {{{ut,F}, {{    
                                {t,Num}, {}}, }}, {{ut,TD}, {
                                    {{t,Mul}, {}}, {{ut,F}, {
                                    {{t,Num}, {}}, }}, {{ut,TD}, {}}, }}, }},
                                    {{ut,ED}, {}}, }}, }},
                        {{t,Rbr}, {}}, }}, {{ut,TD}, {}}, }}, }},
                {{ut,ED}, {}}, }}, }},
        {{t,Rbr}, {}}, }}, }}, }}}

每個結點都以{{t/ut, symbol}, {ch1, ch2, ...}}表示,其中t表示它是個終結符結點,否則是個非終結符結點,symbol是以字符串表示的符號,ch1ch2…是它的第一個直接兒子、第二個直接兒子…。雖然因爲並非抽取非終結符以後的抽象語法樹,所以結果非常雜亂,但保存了完整的推導過程。

輔助函數

在介紹LL(1)分析和LR(1)之前,先給出幾個計算函數,這是所有語法通用的算法函數。

求first集函數

求First集合的僞算法如下:

for symbol in Symbols do
    let First[symbol] = { symbol } if symbol is term else {}
end
do 
    for symbol in uterm Symbols do
        let F be First[symbol]
        for production in Productions do
            let production be A -> X1 X2 ... Xn where A = symbol
            for i in 1 ... n do
                F = F merge (First[X1 X2 ... Xi] - {epsilon})
                if epsilon not in First[X1 X2 ... Xi] then
                    break
                end
            end
            if epsilon in First[X1 X2 ... Xn] then
                F = F merge {epsilon}
            end
        end
    end
while any First[symbol] changed

實現代碼如下:

template<class Grammar, class grammar_traits>
void calculate_first_fixed_point(Grammar &g) {

    using symbol_t = typename Grammar::symbol_t;
    for (auto &x : g.sym_table) {
        auto &sym = x.second;
        auto s = new std::set<symbol_t>();
        if (!sym.is_unterm()) {
            s->insert(sym);
        }
        g.first[sym] = s;
    }

    bool changed;
    do {
        changed = false;
        for (auto &symset : g.first) {
            if (!symset.first.is_unterm()) {
                continue;
            }
            auto &sym = symset.first;
            auto set = symset.second;
            size_t ls = set->size();
            for (auto &prod : g.prods) {
                if (prod.lhs == sym) {
                    bool flag = true;
                    for (auto &rsym : prod.rhs) {
                        auto &rset = *g.first[rsym];
                        if (rset.count(grammar_traits::epsilon) && !set->count(grammar_traits::epsilon)) {
                            set->insert(rset.begin(), rset.end());
                            set->erase(grammar_traits::epsilon);
                        } else {
                            set->insert(rset.begin(), rset.end());
                        }
                        if (!rset.count(grammar_traits::epsilon)) {
                            flag = false;
                            break;
                        }
                    }
                    if (flag) {
                        set->insert(grammar_traits::epsilon);
                    }
                }
            }
            if (set->size() != ls) {
                changed = true;
            }
        }
    } while (changed);
}

求Follow集函數

求Follow集合的僞算法如下:

for symbol in Symbols do
    let Follow[symbol] = { $ } if symbol is begin symbol else {}
end
do
    for production in Productions do
        let production be A -> X1 X2 ... Xn
        for i in 1 ... n do
            Follow[Xi] = Follow[Xi] merge (First[Xi+1 ... Xn] - {epsilon})
        end
        let production be any A -> X B Y
        if epsilon in First[Y] then
            Follow[B] = Follow[B] merge Follow[A]
        end
    end
while any Follow[symbol] changed

實現代碼如下:

template<class Grammar, class grammar_traits>
void calculate_follow_fixed_point(Grammar &g) {
	using symbol_t = typename Grammar::symbol_t;
	auto &beg = g.begin_symbol;
	for (auto &x : g.sym_table) {
		auto &sym = x.second;
		auto s = new std::set<symbol_t>();
		if (sym == beg) {
			s->insert(grammar_traits::dollar);
		}
		g.follow[sym] = s;
	}

	bool changed;
	do {
		changed = false;
		for (auto &prod : g.prods) {
			auto &lhs = prod.lhs;
			auto &rhs = prod.rhs;
			std::set<symbol_t> mset(*g.follow[lhs]);
			for (typename std::make_signed<size_t>::type
				i = rhs.size() - 1; i >= 0; i--) {
				auto &rsym = rhs[i];
				if (rsym.is_unterm()) {
					auto sz = g.follow[rsym]->size();
					g.follow[rsym]->insert(mset.begin(), mset.end());
					if (g.follow[rsym]->size() - sz) {
						changed = true;
					}
				}
				if (!g.first[rsym]->count(grammar_traits::epsilon)) {
					mset.clear();
				}
				mset.insert(g.first[rsym]->begin(), g.first[rsym]->end());
				mset.erase(grammar_traits::epsilon);
			}
		}
	} while (changed);
}

LL(1)分析法

c++模型

LL(1)語法的結構體如下:

template<class grammar_traits, class Policy=BasicLLGrammar<grammar_traits>>
class LL1Grammar : public Policy {
public:
	using model_t = typename grammar_traits::model_t;
	using string = typename grammar_traits::string;
	using strvec = typename grammar_traits::strvec;
	using symbol_t = typename grammar_traits::symbol_t;
	using production_t = typename grammar_traits::production_t;
	
	using grammar_t = LL1Grammar<grammar_traits>;
private:
    ... the same as model 
	
	std::map<symbol_t, std::set<symbol_t>* > first;
	std::map<symbol_t, std::set<symbol_t>* > follow;

	using action_map = std::map<symbol_t, action_space::action*>;
    std::map<symbol_t, action_map*> table;
};

根據First集和Follow集構造LL(1)分析表

LL(1)語法構造的僞算法如下:

for production in Productions do
    let production be A -> B
    for x in First[B] do
        Action[A, x] = use production A -> B
    end
    if epsilon in First[B] do
        for x in Follow[B] do
            Action[A, x] = use production A -> B
        end
    end
end

abort if any Action[A, x] have conflict items

對應C++算法如下:

for (auto &c : sym_table) {
    table[c.second] = new action_map();
}
std::set<symbol_t> mset;
for (auto &prod : prods) {
    get_first_follow1<grammar_t, grammar_traits>(*this, prod, mset);
    auto &acmp = *table[prod.lhs];
    for (auto &sym : mset) {
        if (acmp.count(sym)) {
            std::stringstream s("conflict ");
            print::print("prod:", false, s);
            print::print(prod, false, s);
            print::print("mset:", false, s);
            print::print(mset, false, s);
            print::print("symbol:", false, s);
            print::print(sym, false, s);
            throw std::logic_error(s.str());
        }
        action_space::action *a =
            new action_space::replace_action1<symbol_t>(prod.lhs, prod.rhs);
        acmp[sym] = a;
    }
}

設計通用的LL語法分析器

注意BasicLLGrammar是默認的LL語法分析器,其對應語法推導過程,僞算法如下:

let stack = [<begin symbol, new node> as <Symbol, AstRoot>]
for symbol flow from LexerResult do
    if Top[stack].Symbol is term then
        if Top[stack] not equal to symbol then
            error()
        end
        else
            Pop(stack)
            consume this symbol
        end
    end
    else
        let action be Action[Top[stack].Symbol, symbol]
        if action exists then
            let action be A -> B
            let AstTree be Top[stack].AstRoot
            alloc node ch1, ch2, ... for B
            let ch1, ch2, ... be AstTree's children
            Pop(stack)
            Push(stack, reverse(B))
            remain this symbol in flow
        else
            error()
        end
    end
end
output result

對應C++代碼如下:

template<class IStream>
result_t *work(IStream &is) {
    reset();
    auto result = new result_t();
    auto rt = result->alloc(begin_symbol);
    result->rt = rt;
    stack.push(rt);
    read(is, next_symbol);
    while (stack.size()) {
        if (next_symbol == grammar_traits::eof) {
            return result;
        }
        auto state = stack.top();
        if (state->symbol.is_unterm()) {
            auto &acmp = *(*table)[state->symbol];
            if (acmp.count(next_symbol)) {
                auto d0 = dynamic_cast<
                action_space::replace_action1<symbol_t>*>(acmp[next_symbol]);
                if (d0 != nullptr) {
                    auto &prod = *d0;
                    if (state->symbol != prod.reduce) {
                        if (follow[state->symbol]->count(next_symbol)) {
                            stack.pop();
                        } else {
                            read(is, next_symbol);
                        }
                        error_count++;
                    } else {
                        stack.pop();
                        for (auto &sym : prod.produce) {
                            state->insert(result->alloc(sym));
                        }
                        for (auto iter = state->ch.rbegin(); iter != state->ch.rend();
                        iter++) {
                            stack.push(*iter);
                        }
                    }
                } else {
                    auto d1 = dynamic_cast<action_space::error_action*>(acmp[next_symbol]);
                    if (d1 != nullptr) {
                        auto &info = *d1;
                        error_count++;
                    } else {
                        auto d2 = dynamic_cast<
                            action_space::synch_action*>(acmp[next_symbol]);
                        if (d2 != nullptr) {
                            auto &info = *d2;
                            stack.pop();
                            error_count++;
                        }
                    }
                }
            } else {
                read(is, next_symbol);
                error_count++;
            }
        } else {
            if (next_symbol != state->symbol) {
                error_count++;
            } else {
                stack.pop();
            }
            read(is, next_symbol);
        }
    }
    return result;
}

測試結果

( num + num / ( num + num * num ) )
{ code: Ok, node: {{ut,S}, {
    {{ut,E}, {
        {{t,Lbr}, {}}, {{ut,E}, {
            {{t,Num}, {}}, {{ut,ED}, {
                {{t,Add}, {}}, {{ut,T}, {
                {{t,Num}, {}}, {{ut,TD}, {
                {{t,Div}, {}}, {{ut,F}, {
                {{t,Lbr}, {}}, {{ut,E}, {
                    {{t,Num}, {}}, {{ut,ED}, {
                    {{t,Add}, {}}, {{ut,T}, {
                    {{t,Num}, {}}, {{ut,TD}, {
                    {{t,Mul}, {}}, {{ut,F}, {
                    {{t,Num}, {}}, }}, {{ut,TD}, {}}, }}, }}, {{ut,ED}, {}}, }}, }},
                {{t,Rbr}, {}}, }}, {{ut,TD}, {}}, }}, }}, {{ut,ED}, {}}, }}, }},
        {{t,Rbr}, {}}, {{ut,ED}, {}}, }}, }}}

可見結果相比遞歸調用分析的要簡化很多,減少了很多不必要的推導過程。

LR(1)分析法

c++模型

LR(1)語法的結構體如下:

    template<class grammar_traits, class Policy=BasicLRGrammar<grammar_traits> >
class LR1Grammar : public Policy {
public:
	using model_t = typename grammar_traits::model_t;
	using string = typename grammar_traits::string;
	using strvec = typename grammar_traits::strvec;
	using symbol_t = typename grammar_traits::symbol_t;
	using production_t = typename grammar_traits::production_t;
	using state_id_t = typename grammar_traits::state_id_t;

	using grammar_t = LR1Grammar<grammar_traits>;
private:
	std::map<string, symbol_t> &sym_table;
	std::vector<production_t> &prods;
	symbol_t begin_symbol;

	std::map<symbol_t, std::set<symbol_t>* > first;

	using action_map = std::map<symbol_t, action_space::action*>;
    std::map<state_id_t, action_map*> table;
}

根據First集構造LR(1)分析表

計算LR(1)項目集

僞算法如下:

do
    if Items is empty then
        Items = {<S' -> · S, Dollar> as <Production, Lookahead>}
    end
    for I in Items do
        let J be move(I, x)
        do
            for {A -> B · C X, L} in J do
                for b in First [X L] do
                    for production in Productions do
                        let production be U -> V where U = C
                        J = J merge {<U -> · V, b>}
                    end
                end
            end 
        while J changed
        if J in Items then
            Let K be Item in Items where K = J
            let go(I, x) = K
        end
        else
            Items = Items merge J
            let go(I, x) = J
        end
    end
while Items extended

計算函數的入口函數如下:

template<typename grammar_traits>
void calculate_LR_1_items(
    std::vector<typename grammar_traits::production_t> &prods,
    std::map<typename grammar_traits::symbol_t,
    std::set<typename grammar_traits::symbol_t>* > &first,
    typename grammar_traits::symbol_t &begin_symbol,
    const std::function<void(LR1ActionCalculationContext<grammar_traits>&)> &callback) {
    LR1ActionCalculationContext<grammar_traits>(prods, first, begin_symbol).build().
        callback(callback);
}

具體的計算函數太過複雜就不貼出來了,但我們也把它們的函數簽名貼出來並做分析:

template<class grammar_traits>
struct LR1ActionCalculationContext {
    using context_t = LR1ActionCalculationContext<grammar_traits>;
    using symbol_t = typename grammar_traits::symbol_t;
    using state_id_t = typename grammar_traits::state_id_t;
    using production_t = typename grammar_traits::production_t;
    using action_map = std::map<symbol_t, action_space::action*>;
    using item_t = std::pair<std::pair<int, int>, symbol_t>;
    using hashed_item_t = int64_t;
    using state_set = std::set<item_t>;

    std::vector<item_t> items;
    std::vector<state_set*> state;
    std::map<hashed_item_t, state_id_t> hash_set;
    graph::WeightedGraph<state_id_t, symbol_t, 500, 500 * 10> automa;
    int64_t seed, seed2;
    const int64_t mod = 1000000000 + 9;

    std::vector<production_t> &prods;
    std::map<symbol_t, std::set<symbol_t>* > first;
    symbol_t &begin_symbol;

    context_t &build() {...}

    context_t &callback(const std::function<void(context_t &)> &cb) {...}

private:

    void walk(state_id_t idx) {...}

    state_id_t extend(state_id_t idx) {...}

    void extend_to(state_set &mset, std::set<symbol_t> lookahead,
        const symbol_t &next_sym) {...}

    hashed_item_t calculate_hash(state_set &mset) {...}
};

build函數內先將$S’\to \cdot S, $ 0調extend調walkwalk調extend調extendextendto初始化爲第0個狀態。然後調用`extend`函數擴展狀態,再調用walk函數拓展狀態下狀態。 `walk`函數對於每個未發現的項目集移動遊標,對每個新的項目集調用`extend`函數再調用自身。 `extend`函數對於每個可推導的非終結符都用`extend_to`和計算出的\mathrm{First}(\beta a)$展望符集合拓展,如果之前出現過該項目集,則刪除新拓展的項目集並返回舊項目集編號否則保留新項目集。

構造LR(1)分析表

僞算法如下:

for I in LR(1) Items do
    let <A -> B · a C, b> be Item in I where a is term
    and let J = go(I, a) 
    then Action[I, a] = Shift a and Goto J
    
    let <A -> B · U C, b> be Item in I where U is uterm
    and let J = go(I, U) 
    then Action[I, U] = Goto J
    
    let <A -> B ·, b> be Item in I where b is not Dollar
    then Action[I, b] = Reduce B to A
    
    let <S -> A ·, Dollar> be Item in I where S is begin symbol
    then Action[I, Dollar] = Accept
end

c++代碼如下:

calculate_LR_1_items<grammar_traits>(prods, first, begin_symbol, [&](
    context_t &context) {

    for (int i = 0; i < context.state.size(); i++) {
        auto acmp = new action_map();
        table[i] = acmp;
        for (auto j : context.automa.at_e(i)) {
            auto &sym = j.w;
            if (sym.is_unterm()) {
#ifdef DEBUG
                if ((*acmp).count(sym)) {
                    std::cout << "conflict item"; print::print(sym, true);
                }
#endif
                (*acmp)[sym] = new action_space::goto_action<state_id_t>(j.to);
            } else {
#ifdef DEBUG
                if ((*acmp).count(sym)) {
                    std::cout << "conflict item"; print::print(sym, true);
                }
#endif
                (*acmp)[sym] = new action_space::shift_action<state_id_t>(j.to);
            }
        }

        for (auto &item : *context.state[i]) {
            if (item.first.second == prods[item.first.first].rhs.size()) {
#ifdef DEBUG
                if ((*acmp).count(item.second)) {
                    std::cout << "conflict item"; print::print(item.second, true);
                }
#endif
                if (prods[item.first.first].lhs == begin_symbol) {
                    (*acmp)[item.second] = new action_space::accept_action();
                } else {
                    (*acmp)[item.second] =
                    new action_space::replace_action1<symbol_t>(
                        prods[item.first.first].lhs, prods[item.first.first].rhs);
                }
            }
        }
    }
});

設計通用的LR語法分析器

注意BasicLRGrammar是默認的LR分析器,其對應的語法推導過程僞算法如下:

let stack = [<begin symbol, I0, new node> as <Symbol, Item,AstRoot>]
for symbol flow from LexerResult do
    let S, I, R be extracted from Top[stack]
    let action = Action[I, symbol]
    if action is in form of Shift a and Goto J
        Push(stack, <a, J, new node>)
    end
    else if action is in form of Reduce B to A
        let chs = []
        for Top[stack] matching B do
            let T, J, ch be extracted from Top[stack]
            chs = [ch, chs...]
        end
        let T, J, Rt be extracted from Top[stack] where T must equal to A
        Push(stack, <A, goto(J, A), new node with children chs>)
    end
    else if action is in form of Accept
        return Bottom[S].AstRoot
    end
    else
        error()
    end
end

對應的c++代碼如下:

template<class IStream>
result_t *work(IStream &is) {
    reset();
    auto result = new result_t();
    auto rt = result->alloc(begin_symbol);
    result->rt = rt;
    stack.push(std::make_pair(rt, 0));
    read(is, next_symbol);
    while (stack.size()) {

        // print::print(stack.top(), true);
        auto acmp = (*table)[stack.top().second];
        if (!acmp->count(next_symbol)) {
            error_count++;
            read(is, next_symbol);
            continue;
}
        auto norm_action = (*acmp)[next_symbol];
        if (auto action = dynamic_cast<action_space::shift_action<state_id_t>*>(
            norm_action)) {
            stack.push({result->alloc(next_symbol), action->to_state});
            read(is, next_symbol);

        } else if (auto action = dynamic_cast<action_space::replace_action1<symbol_t>*>(
            norm_action)) {
            auto mrt = result->alloc(action->reduce);
            for (typename std::make_signed<size_t>::type i = action->produce.size() - 1;
                i >= 0; i--) {
                mrt->ch.push_back(stack.top().first);
                stack.pop();
}
            std::reverse(mrt->ch.begin(), mrt->ch.end());
            auto acmp2 = (*table)[stack.top().second];
            stack.push({mrt, (dynamic_cast<action_space::goto_action<state_id_t>*>(
                (*acmp2)[action->reduce]))->to_state});
        } else if (auto action = dynamic_cast<action_space::accept_action*>(norm_action)) {
            result->rt->insert(stack.top().first);
            // std::cout << "stack size " << stack.size() << std::endl;
            if (error_count != 0) {
                result->code = ResultCode::Error;
            }
            return result;
        }

}
    if (error_count != 0) {
        result->code = ResultCode::Error;
    }
    return result;
}

測試結果

測試結果如下:

( num + num / ( num + num * num ) )
{ code: Ok, node: {{ut,S}, {{{ut,E}, {{{ut,T}, {{
    {ut,F}, {
        {{t,Lbr}, {}}, {{ut,E}, {{{ut,E}, {{{ut,T}, {{{ut,F}, {
            {{t,Num}, {}}, }}, }}, }},
            {{t,Add}, {}}, {{ut,T}, {{{ut,T}, {{{ut,F}, {
                {{t,Num}, {}}, }}, }},
                {{t,Div}, {}}, {{ut,F}, {
                    {{t,Lbr}, {}}, {{ut,E}, {{{ut,E}, {{{ut,T}, {{{ut,F}, {
                        {{t,Num}, {}}, }}, }}, }},
                        {{t,Add}, {}}, {{ut,T}, {{{ut,T}, {{{ut,F}, {
                            {{t,Num}, {}}, }}, }},
                            {{t,Mul}, {}}, {{ut,F}, {
                            {{t,Num}, {}}, }}, }}, }},
                    {{t,Rbr}, {}}, }}, }}, }},
        {{t,Rbr}, {}}, }}, }}, }}, }}}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章