/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] > //最好不要定義成全局的否則會左右很多的事情
| <#_ESCAPED_CHAR: "\\" ~[] > //經過轉以後的任意字符如\\s,\\t //~[]表示包含所有的字符
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"[", "]","<", ">","\"","#","%","'", "{", "}","~", "*", "?","\\" ,"=",","]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}
<DEFAULT, RangeIn, RangeEx> SKIP : {
< <_WHITESPACE>>
}
<DEFAULT> TOKEN : {
<AND: ([ "a", "A" ] [ "n", "N" ] [ "D", "d" ]) >
| <OR: ([ "o", "O" ] [ "r", "R" ] | ",") >
| <NOT: ([ "N", "n" ] [ "o", "O" ] [ "t", "T" ]) >
| <NEAR: (["N","n"]["e","E"]["A","a"]["r","R"])> //注意在全局定義的收不要定義([A-Z]|[a-z])*這樣的的定義這樣也會造成侷限性
| <WORDFORWARD: (["W","w"]["f","F"](< _NUM_CHAR>)*)>
| <WORDNEAR: (["W","w"](< _NUM_CHAR>)*) >
| <SPACE:(["S","s"]["p","P"]["A","a"]["C","c"]["e","E"]) >
| <GREATER: ">" >
| <LESS: "<" >
| <SMALLDOT: "'" >
| <PLUS: "+" >
| <MINUS: "-" >
| <LPAREN: ("(") >
| <RPAREN: (")") >
| <COLON: "=" >
| <SHIT: ["~","#","%"] >
| <STAR: "*" >
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: < SHIT> ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN_START: ("[") > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<RangeIn> TOKEN : {
<RANGEIN_TO: ((["t","T"]["O","o"])|< MINUS>|< SHIT>)>
| <RANGEIN_END: ("]")> : DEFAULT
| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGEIN_GOOP: (~["-","~"," ", "]" ])+ >
}
<RangeEx> TOKEN : {
<RANGEEX_TO: < RANGEIN_TO>> //有些定義是可以重複使用的如<RANGE_TO>
| <RANGEEX_END: "}"> : DEFAULT
| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGEEX_GOOP: (~["-","~"," ", "}" ])+ >
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
int Conjunction() : {
int ret = CONJ_NONE;
}
{
[
<AND> { ret = CONJ_AND; }
| <OR> { ret = CONJ_OR; }
]
{ return ret; }
}
int Modifiers() : {
int ret = MOD_NONE;
}
{
[
<PLUS> { ret = MOD_REQ; }
| <MINUS> { ret = MOD_NOT; }
| <NOT> { ret = MOD_NOT; }
]
{ return ret; }
}
// This makes sure that there is no garbage after the query string
Query TopLevelQuery(String field) :
{
Query q;
}
{
q=Query(field) <EOF>
{
return q;
}
}
Query Query(String field) :
{
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int conj, mods;
}
{
mods=Modifiers() q=Clause(field)
{
addClause(clauses, CONJ_NONE, mods, q);
if (mods == MOD_NONE)
firstQuery=q;
}
(
conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); }
)*
{
if (clauses.size() == 1 && firstQuery != null)
return firstQuery;
else {
return getBooleanQuery(clauses);
}
}
}
Query Clause(String field) : {
Query q;
Token fieldToken=null;
Token lparen=null;
}
{
[
LOOKAHEAD(2)
(
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
| <STAR> <COLON> {field="*";}
)
]
(
(
LOOKAHEAD(3)
(
LOOKAHEAD(3)
q = distanceQueryStart(field)
| q = Term(field)
)
|(<LPAREN>)
{
lbrace+=1;
appendlbrace+=1;
}q=Query(field) (<RPAREN>|<EOF >
{if(lbrace!=rbrace){throw new ParseException("brace does not matches");}else{return q;}})
{appendlbrace=0;}(<CARAT> boost=<NUMBER>)?
)
)
{
if (boost != null) {
float f = (float)1.0;
try {
f = Float.valueOf(boost.image).floatValue();
q.setBoost(f);
} catch (Exception ignored) { }
}
return q;
}
}
Query rangeTerm(String field):
{
Token term,fuzzySlop=null, goop1, goop2;
Query q;
}
{
(
( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
<RANGEIN_END> )
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGEIN_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
}
if (goop2.kind == RANGEIN_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
}
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
<RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGEEX_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
}
if (goop2.kind == RANGEEX_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
}
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false);
}
)
{
return q;
}
}
void distanceQuery(String field,String beforeStr):
{
Token term, boost=null;
Token slop=null;
}
{
(
(
slop=< NEAR>
|slop=< WORDFORWARD>
|slop=< WORDNEAR>
|slop=< SPACE>
)
{
disnwcout+=1;
sb.append(" "+slop.image.trim()+" ");
distanceQuery(field,beforeStr);
return;
}
|term = < TERM>
{
diskycout+=1;
sb.append(term.image.trim());
distanceQuery(field,beforeStr);
return;
}|(
<LPAREN>{sb.append(" ( ");lbrace+=1;}
|< RPAREN>{rbrace+=1;sb.append(" ) ");}
)
{distanceQuery(field,beforeStr);return;}
|[<CARAT> boost=<NUMBER >]
{
if(diskycout!=0 && disnwcout!=0 && diskycout-disnwcout!=1)
{
throw new ParseException("the keyword does not matches");
}
DistanceQueryFormat format = new DistanceQueryFormat();
DistanceQueryFormat.parenMatches(lbrace,rbrace);
distanceQuery = format.formatPhraseTerm(field+":"+beforeStr+" "+sb.toString().trim());
//System.out.println(field+":"+beforeStr+" "+sb.toString().trim());
lbrace=0;
rbrace=0;
diskycout=0;
disnwcout=0;
sb = new StringBuffer();
}
)
}
Query distanceQueryStart(String field):
{
Token slop=null;
Token term = null;
Token lparen = new Token();
lparen.image="";
}
{
(
term =< TERM >(
slop=< NEAR>
|slop=< WORDFORWARD>
|slop=< WORDNEAR>
|slop=< SPACE>
)
|lparen=< LPAREN> term=< TERM>
(
slop=< NEAR>
|slop=< WORDFORWARD>
|slop=< WORDNEAR>
|slop=< SPACE>
)
{
lbrace+=1;
}
)
{
diskycout+=1;
disnwcout+=1;
String lb="";
for(int i=0;i<appendlbrace;i++)
{
lb+=" ( ";
}
appendlbrace=0;
distanceQuery(field,lb+lparen.image.trim()+" "+term.image.trim()+" "+slop);
return distanceQuery;
}
}
Query smpTerm(String field):
{
Token term,nwws ,fuzzySlop=null;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
Query q;
}
{
(
(
term=<TERM>
| term=<STAR> { wildcard=true; }
| term=<PREFIXTERM> { prefix=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<NUMBER>
)
[ (fuzzySlop=<FUZZY_SLOP>| < SHIT>) { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
String termImage=discardEscapeChar(term.image);
if (wildcard) {
q = getWildcardQuery(field, termImage);
} else if (prefix) {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
} catch (Exception ignored) { }
if(fms < 0.0f || fms > 1.0f){
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
}
q = getFuzzyQuery(field, termImage,fms);
}else {
q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage);
}
}
)
{
return q;
}
}
Query quotedTerm(String field):
{
Query q;
Token term,fuzzySlop=null;
}
{
(
term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
int s = phraseSlop;
if (fuzzySlop != null) {
try {
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}
q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
}
)
{
return q;
}
}
Query smpRange(String field):
{
Token term1,term2,boost=null;
Query q;
Token mark;
}
{
term1=< TERM >
(mark=< GREATER>|mark=< LESS>)
term2=< TERM >
[ <CARAT> boost=<NUMBER> ]
{
if(mark.image.trim().matches("[>]"))
{
q = getRangeQuery(term1.image.trim(),term2.image.trim(),String.valueOf(Long.MAX_VALUE),true);
}else
{
q = getRangeQuery(term1.image.trim(),String.valueOf(0),term2.image.trim(),true);
}
return q;
}
}
Query Term(String field) : {
Token term,fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
Query q;
}
{
( (
LOOKAHEAD(2)
q = smpRange(field)
|q = smpTerm(field)
)
|q = rangeTerm(field)
|q = quotedTerm(field)
)
{
if (boost != null) {
float f = (float) 1.0;
try {
f = Float.valueOf(boost.image).floatValue();
}
catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid)
*/
}
// avoid boosting null queries, such as those caused by stop words
if (q != null) {
q.setBoost(f);
}
}
boost = null;
return q;
}
}
自己修改的lucene的jjt文件
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.