自己修改的lucene的jjt文件

/* ***************** */
/* Token Definitions */
/* ***************** */

<*> TOKEN : {
  <#_NUM_CHAR:   ["0"-"9"] > //最好不要定義成全局的否則會左右很多的事情
| <#_ESCAPED_CHAR: "\\" ~[] > //經過轉以後的任意字符如\\s,\\t //~[]表示包含所有的字符
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
                           "[", "]","<", ">","\"","#","%","'", "{", "}","~", "*", "?","\\" ,"=",","]
                       | <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}

<DEFAULT, RangeIn, RangeEx> SKIP : {
  < <_WHITESPACE>>
}

<DEFAULT> TOKEN : {
  <AND:       ([ "a", "A" ] [ "n", "N" ] [ "D", "d" ]) >
| <OR:        ([ "o", "O" ] [ "r", "R" ] | ",") >
| <NOT:       ([ "N", "n" ] [ "o", "O" ] [ "t", "T" ]) >
| <NEAR:     (["N","n"]["e","E"]["A","a"]["r","R"])>  //注意在全局定義的收不要定義([A-Z]|[a-z])*這樣的的定義這樣也會造成侷限性
| <WORDFORWARD: (["W","w"]["f","F"](< _NUM_CHAR>)*)>
| <WORDNEAR:  (["W","w"](< _NUM_CHAR>)*) >
| <SPACE:(["S","s"]["p","P"]["A","a"]["C","c"]["e","E"]) >
| <GREATER: ">" >
| <LESS: "<" >
| <SMALLDOT: "'" >
| <PLUS:      "+" >
| <MINUS:     "-" >
| <LPAREN:    ("(") >
| <RPAREN:    (")") >
| <COLON:     "=" >
| <SHIT:      ["~","#","%"] >
| <STAR:      "*" >
| <CARAT:     "^" > : Boost
| <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
| <FUZZY_SLOP:     < SHIT> ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN_START: ("[") > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}

<Boost> TOKEN : {
<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}

<RangeIn> TOKEN : {
<RANGEIN_TO: ((["t","T"]["O","o"])|< MINUS>|< SHIT>)>
| <RANGEIN_END: ("]")> : DEFAULT
| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGEIN_GOOP: (~["-","~"," ", "]" ])+ >
}

<RangeEx> TOKEN : {
<RANGEEX_TO: < RANGEIN_TO>> //有些定義是可以重複使用的如<RANGE_TO>
| <RANGEEX_END: "}"> : DEFAULT
| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGEEX_GOOP: (~["-","~"," ", "}" ])+ >
}


// *   Query  ::= ( Clause )*
// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

int Conjunction() : {
  int ret = CONJ_NONE;
}
{
  [
    <AND> { ret = CONJ_AND; }
    | <OR>  { ret = CONJ_OR; }
  ]
  { return ret; }
}

int Modifiers() : {
  int ret = MOD_NONE;
}
{
  [
     <PLUS> { ret = MOD_REQ; }
     | <MINUS> { ret = MOD_NOT; }
     | <NOT> { ret = MOD_NOT; }
  ]
  { return ret; }
}

// This makes sure that there is no garbage after the query string
Query TopLevelQuery(String field) : 
{
	Query q;
}
{
	q=Query(field) <EOF>
	{
		return q;
	}
}

Query Query(String field) :
{
  List<BooleanClause> clauses = new ArrayList<BooleanClause>();
  Query q, firstQuery=null;
  int conj, mods;
}
{
  mods=Modifiers() q=Clause(field)
  {
    addClause(clauses, CONJ_NONE, mods, q);
    if (mods == MOD_NONE)
        firstQuery=q;
  }
  (
    conj=Conjunction() mods=Modifiers() q=Clause(field)
    { addClause(clauses, conj, mods, q); }
  )*
    {
      if (clauses.size() == 1 && firstQuery != null)
        return firstQuery;
      else {
  return getBooleanQuery(clauses);
      }
    }
}

Query Clause(String field) : {
  Query q;
  Token fieldToken=null;
  Token lparen=null;
}
{
  [
    LOOKAHEAD(2)
    (
    fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
    | <STAR> <COLON> {field="*";}
    )
  ]

  (
   (
     LOOKAHEAD(3)
    (
      LOOKAHEAD(3)
       q = distanceQueryStart(field)
     | q = Term(field)
    )
   |(<LPAREN>)
    {
      lbrace+=1;
      appendlbrace+=1;
    }q=Query(field) (<RPAREN>|<EOF >
    {if(lbrace!=rbrace){throw new ParseException("brace does not matches");}else{return q;}})
    {appendlbrace=0;}(<CARAT> boost=<NUMBER>)? 
   )
  )
    {
      if (boost != null) {
        float f = (float)1.0;
  try {
    f = Float.valueOf(boost.image).floatValue();
          q.setBoost(f);
  } catch (Exception ignored) { }
      }
      return q;
    }
}


Query rangeTerm(String field):
{
  Token term,fuzzySlop=null, goop1, goop2;
  Query q;
}
{
   (
     ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
         <RANGEIN_END> )
       [ <CARAT> boost=<NUMBER> ]
        {
          if (goop1.kind == RANGEIN_QUOTED) {
            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
          }
          if (goop2.kind == RANGEIN_QUOTED) {
            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
          }
          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true);
        }
     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
         <RANGEEX_END> )
       [ <CARAT> boost=<NUMBER> ]
        {
          if (goop1.kind == RANGEEX_QUOTED) {
            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
          }
          if (goop2.kind == RANGEEX_QUOTED) {
            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
          }
          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false);
        }
      )
      {
          return q;
      }
}

void distanceQuery(String field,String beforeStr):
{
  Token term, boost=null;
  Token slop=null;
}
{
  (
    ( 
     slop=< NEAR>
    |slop=< WORDFORWARD>
    |slop=< WORDNEAR>
    |slop=< SPACE>
    )
    {
      disnwcout+=1;
      sb.append(" "+slop.image.trim()+" ");
      distanceQuery(field,beforeStr);
      return;
    }
    |term = < TERM>
    {
       diskycout+=1;
       sb.append(term.image.trim());
       distanceQuery(field,beforeStr);
       return;
    }|(
      <LPAREN>{sb.append(" ( ");lbrace+=1;}
     |< RPAREN>{rbrace+=1;sb.append(" ) ");}
      )
      {distanceQuery(field,beforeStr);return;}
    |[<CARAT> boost=<NUMBER >]
  {
	if(diskycout!=0 && disnwcout!=0 && diskycout-disnwcout!=1)
	{
	  throw new ParseException("the keyword does not matches");
	}
	
	DistanceQueryFormat format = new DistanceQueryFormat();
	DistanceQueryFormat.parenMatches(lbrace,rbrace);
	distanceQuery = format.formatPhraseTerm(field+":"+beforeStr+" "+sb.toString().trim());
	//System.out.println(field+":"+beforeStr+" "+sb.toString().trim());
	lbrace=0;
	rbrace=0;
	diskycout=0;
	disnwcout=0;
	sb = new StringBuffer();
  } 
  )
  
}


Query distanceQueryStart(String field):
{
  Token slop=null;
  Token term = null;
  Token lparen = new Token();
  lparen.image="";
}
{
(
     term =< TERM >( 
     slop=< NEAR>
    |slop=< WORDFORWARD>
    |slop=< WORDNEAR>
    |slop=< SPACE>
    )
    |lparen=< LPAREN> term=< TERM>
    ( 
     slop=< NEAR>
    |slop=< WORDFORWARD>
    |slop=< WORDNEAR>
    |slop=< SPACE>
    )
    {
      lbrace+=1;
    }
)
  {
    diskycout+=1;
    disnwcout+=1;
    String lb="";
    for(int i=0;i<appendlbrace;i++)
    {
      lb+=" ( ";
    }
    appendlbrace=0;
	distanceQuery(field,lb+lparen.image.trim()+" "+term.image.trim()+" "+slop);    
    return distanceQuery;
  }
}

Query smpTerm(String field):
{
  Token term,nwws ,fuzzySlop=null;
  boolean prefix = false;
  boolean wildcard = false;
  boolean fuzzy = false;
  Query q;
}
{
  (
    (
         term=<TERM>
       | term=<STAR> { wildcard=true; }
       | term=<PREFIXTERM> { prefix=true; }
       | term=<WILDTERM> { wildcard=true; }
       | term=<NUMBER>
     )
     [ (fuzzySlop=<FUZZY_SLOP>| < SHIT>) { fuzzy=true; } ]
     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
     {
       String termImage=discardEscapeChar(term.image);
       if (wildcard) {
       q = getWildcardQuery(field, termImage);
       } else if (prefix) {
         q = getPrefixQuery(field,
           discardEscapeChar(term.image.substring
          (0, term.image.length()-1)));
       } else if (fuzzy) {
       	  float fms = fuzzyMinSim;
       	  try {
            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
       	  } catch (Exception ignored) { }
       	 if(fms < 0.0f || fms > 1.0f){
       	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
       	 }
       	 q = getFuzzyQuery(field, termImage,fms);
       }else {
         q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage);
       }
     }
  )
  {
    return q;
  }
}

Query quotedTerm(String field):
{
  Query q;
  Token term,fuzzySlop=null;
}
{
  (
     term=<QUOTED>
       [ fuzzySlop=<FUZZY_SLOP> ]
       [ <CARAT> boost=<NUMBER> ]
       {
         int s = phraseSlop;

         if (fuzzySlop != null) {
           try {
             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
           }
           catch (Exception ignored) { }
         }
         q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
       }
  )
  {
    return q;
  }
}

Query smpRange(String field):
{
   Token term1,term2,boost=null;
   Query q;
   Token mark;
}
{
  term1=< TERM >
  (mark=< GREATER>|mark=< LESS>)
  term2=< TERM >
  [ <CARAT> boost=<NUMBER> ]
  {
     if(mark.image.trim().matches("[>]"))
     {
	   q = getRangeQuery(term1.image.trim(),term2.image.trim(),String.valueOf(Long.MAX_VALUE),true);
     }else
     {
        q = getRangeQuery(term1.image.trim(),String.valueOf(0),term2.image.trim(),true);
     }
    return q;
  }
}

Query Term(String field) : {
  Token term,fuzzySlop=null, goop1, goop2;
  boolean prefix = false;
  boolean wildcard = false;
  boolean fuzzy = false;
  Query q;
}
{
  (  (
       LOOKAHEAD(2)
 	   q = smpRange(field) 
      |q = smpTerm(field)
      )
     |q = rangeTerm(field)
     |q = quotedTerm(field)
  )
  {
    if (boost != null) {
      float f = (float) 1.0;
      try {
        f = Float.valueOf(boost.image).floatValue();
      }
      catch (Exception ignored) {
    /* Should this be handled somehow? (defaults to "no boost", if
     * boost number is invalid)
     */
      }
      // avoid boosting null queries, such as those caused by stop words
      if (q != null) {
        q.setBoost(f);
      }
    }
    boost = null;
    return q;
  }
  
}

發佈了92 篇原創文章 · 獲贊 3 · 訪問量 21萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章