索引

3/8/2017来源:ASP.NET技巧人气:2969

package lucene.incrementindex; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import lucene.util.Page; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PRefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import ci.page.Utils; public class SearchServlet extends HttpServlet { private static final long serialVersionUID = 1L; private static Analyzer analyzer; Page<Document> page; private static String radio; public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String queryString = new String(request.getParameter("searchvalue").getBytes("ISO8859-1"), "UTF-8"); radio = Utils.getParaStr(request, "radio", ""); if(queryString.equals("")){ request.setAttribute("radio", radio); request.getRequestDispatcher("/lucene/resultlist.jsp").forward(request, response); return; } String fieldName = "INFONAME"; int currentPage = Utils.getParaInt(request, "currentPage", 1); int pageSize = 10; analyzer = new IKAnalyzer(true); BooleanQuery query = new BooleanQuery(); TokenStream tokenStream = analyzer.tokenStream("INFONAME", new StringReader(queryString)); ArrayList<String> analyzerKeys = new ArrayList<String>(); while(tokenStream.incrementToken()){ CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); analyzerKeys.add(term.toString()); } BooleanQuery queryField = new BooleanQuery(); for(String analyzerKey : analyzerKeys){ PrefixQuery Prefixquery = new PrefixQuery(new Term (fieldName, analyzerKey)); queryField.add(Prefixquery, Occur.SHOULD); } query.add(queryField, Occur.SHOULD); String savePath = getServletConfig().getServletContext().getRealPath("luceneindex"); File file = new File(savePath); Directory mdDirectory = FSDirectory.open(file); IndexReader reader = IndexReader.open(mdDirectory); IndexSearcher searcher = new IndexSearcher(reader); int totalRecord = searchTotalRecord(searcher,query); try { page = pageQuery(query,fieldName, queryString, searcher , currentPage, pageSize); if(page == null || page.getItems() == null || page.getItems().size() == 0) { request.setAttribute("error","很抱歉没有找到“"+queryString+"”相关");//当前页数 }else{ List list = new ArrayList(); for(Document doc : page.getItems()) { Mddata md = new Mddata(); String ID = doc.get("ID"); String PUBLISHERNAME = doc.get("PUBLISHERNAME"); String INFONAME = doc.get("INFONAME"); String LINKURL = doc.get("LINKURL"); String METADATADATE = doc.get("METADATADATE"); String TVALUE = doc.get("TVALUE"); String ZHNAME = doc.get("ZHNAME"); String SITENAME = doc.get("SITENAME"); String title = displayHtmlHighlight(query, fieldName, INFONAME, 300); String content = displayHtmlHighlight(query, "TVALUE", TVALUE, TVALUE.length()); title = title==null ? INFONAME : title; content = content==null ? TVALUE : content; if(title!=null){ md.setPublishername(PUBLISHERNAME); md.setInfoname(INFONAME); md.setLinkurl(LINKURL); md.setMetadatadate(METADATADATE); md.setTitle(title); md.setContent(content); md.setZhname(ZHNAME); md.setSitename(SITENAME); list.add(md); } } request.setAttribute("Height", list); request.setAttribute("radio", radio); request.setAttribute("totalRecord", totalRecord); } Collection<Integer> items = new ArrayList<Integer>(); for(int i=0; i < totalRecord; i++) { items.add(new Integer(i)); } Page<Integer> pag = new Page<Integer>(page.getCurrentPage(), page.getPageSize(),items,10); pag.setTotalRecord(totalRecord); int totalPage = pag.getTotalPage(); String[] pageRange = pag.getPageRange(); request.setAttribute("currentPage", page.getCurrentPage());//当前页数 request.setAttribute("pageRange", pageRange); request.setAttribute("totalPage", totalPage); request.setAttribute("queryString", queryString); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } request.getRequestDispatcher("/lucene/resultlist.jsp").forward(request, response); } public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } /** * Lucene分页查询 * @param directoryPath * @param query * @param page * @throws IOException */ public static void pageQuery(IndexSearcher searcher,Query query,Page<Document> page) throws IOException { Sort sort = new Sort(new SortField("INFONAME", SortField.Type.SCORE), new SortField("METADATADATE", SortField.Type.STRING, true)); ScoreDoc AfterDoc = getLastScoreDoc(page.getCurrentPage(), page.getPageSize(), query, searcher); TopDocs topDocs = null; if(radio.equals("time")){ topDocs = searcher.searchAfter(AfterDoc,query, page.getPageSize(),sort); }else{ topDocs = searcher.searchAfter(AfterDoc,query, page.getPageSize()); } List<Document> docList = new ArrayList<Document>(); ScoreDoc[] docs = topDocs.scoreDocs; int index = 0; for (ScoreDoc scoreDoc : docs) { int docID = scoreDoc.doc; Document document = searcher.doc(docID); docList.add(document); index++; } page.setItems(docList); searcher.getIndexReader().close(); } /** * 索引分页查询 * @param fieldName * @param queries * @param currentPage * @param pageSize * @throws ParseException * @throws IOException */ public static Page<Document> pageQuery(BooleanQuery query,String fieldName,String queryString,IndexSearcher searcher,int currentPage,int pageSize) throws IOException { Page<Document> page = new Page<Document>(currentPage,pageSize); pageQuery(searcher, query, page); return page; } private static ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher searcher) throws IOException { Sort sort = new Sort(new SortField("INFONAME", SortField.Type.SCORE), new SortField("METADATADATE", SortField.Type.INT, true)); if(pageIndex==1)return null;//如果是第一页就返回空 int num = pageSize*(pageIndex-1);//获取上一页的数量 TopDocs tds = null; if(radio.equals("timer")){ tds = searcher.search(query, num, sort); }else{ tds = searcher.search(query, num); } return tds.scoreDocs[num-1]; } /** * @Title: searchTotalRecord * @Description: 获取符合条件的总记录数 * @param query * @return * @throws IOException */ public static int searchTotalRecord(IndexSearcher searcher,Query query) throws IOException { TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) { return 0; } ScoreDoc[] docs = topDocs.scoreDocs; return docs.length; } static String displayHtmlHighlight(Query query , String fieldName, String fieldContent, int fragmentSize) throws IOException, InvalidTokenOffsetsException { //创建一个高亮器 Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"), new QueryScorer(query)); Fragmenter fragmenter = new SimpleFragmenter(fragmentSize); highlighter.setTextFragmenter(fragmenter); return highlighter.getBestFragment(analyzer, fieldName, fieldContent); } }