轉載:Windows下免cygwin運行Nutch

從這裏轉載的免cygwin命令行:http://hi.baidu.com/haojielyb/blog/item/778318306ecb809da8018ecb.html

多謝這位大牛了。寫得很好。但在nutch 1.0中測試無效,錯不在這些bat,在Hadoop。

nutch window 版 bat 命令 免 cygwin 中文

0.7.2=============

@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else goto echoMSG
:echoMSG
echo 李永波製作
echo Nutch Version: 0.7.2
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo   crawl             one-step crawler for intranets 一步式抓取企業內部網
echo   admin             database administration, including creation 數據庫管理(包括建立)
echo   inject            inject new urls into the database 添加新的 網站url 到數據庫
echo   generate          generate new segments to fetch    生成新的獲取數據
echo   fetchlist         print the fetchlist of a segment 打印數據的存取列表
echo   fetch             fetch a segment's pages 存取數據段的頁
echo   parse             parse a segment's pages 解析數據段的頁
echo   index             run the indexer on a segment's fetcher output 在段的存取輸出上運行索引
echo   merge             merge several segment indexes 合併幾個數據段的索引
echo   dedup             remove duplicates from a set of segment indexes 從數據段集刪除重複的索引
echo   updatedb          update db from segments after fetching 在獲取後從數據段中更新數據庫
echo   updatesegs        update segments with link data from the db 從數據庫中更新數據段和鏈接數據
echo   mergesegs         merge multiple segments into a single segment 合併多重數據段成一個單一的部分
echo   readdb            examine arbitrary fields of the database 審查數據庫任意字段
echo   analyze           adjust database link-analysis scoring 調整數據庫連接,分析得分
echo   prune             prune segment index(es) of unwanted content 修剪部分索引不想要的內容
echo   segread           read, fix and dump segment data 閱讀,修理和丟棄數據段
echo   segslice          append, join and slice segment data 附錄,加入和切片部分數據
echo   server            run a search server 運行搜索的服務器
echo   namenode          run the NDFS namenode 運行 NDFS 名稱 節點
echo   datanode          run an NDFS datanode 運行ndfs 數據節點
echo   ndfs              run an NDFS admin client 運行NDFS 管理節點
echo   jobtracker        run the MapReduce job Tracker node 運行mapreduce作業跟蹤節點
echo   tasktracker       run a MapReduce task Tracker node 運行mapreduce任務跟蹤節點
echo or
echo   CLASSNAME         run the class named CLASSNAME 運行指定類名的類
echo Most commands print help when invoked w/o parameters. 當參數錯誤時命令會打印幫助
goto end;
:INIT
set NUTCH_HOME=C:/work/nutch-0.7.2
if "%NUTCH_HOME%"=="" set NUTCH_HOME=..
set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%/lib/*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set CLASS=org.apache.nutch.tools.CrawlTool
if "%1" == "admin" set CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set CLASS=org.apache.nutch.db.WebDBInjector
if "%1" == "generate" set CLASS=org.apache.nutch.tools.FetchListTool
if "%1" == "fetchlist" set CLASS=org.apache.nutch.pagedb.FetchListEntry
if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set CLASS=org.apache.nutch.tools.ParseSegment
if "%1" == "index" set CLASS=org.apache.nutch.indexer.IndexSegment
if "%1" == "merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "dedup" set CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "updatedb" set CLASS=org.apache.nutch.tools.UpdateDatabaseTool
if "%1" == "updatesegs" set CLASS=org.apache.nutch.tools.UpdateSegmentsFromDb
if "%1" == "mergesegs" set CLASS=org.apache.nutch.tools.SegmentMergeTool
if "%1" == "readdb" set CLASS=org.apache.nutch.db.WebDBReader
if "%1" == "prune" set CLASS=org.apache.nutch.tools.PruneIndexTool
if "%1" == "segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segslice" set CLASS=org.apache.nutch.segment.SegmentSlicer
if "%1" == "analyze" set CLASS=org.apache.nutch.tools.LinkAnalysisTool
if "%1" == "server" set CLASS=org.apache.nutch.searcher.DistributedSearch$Server
if "%1" == "namenode" set CLASS=org.apache.nutch.ndfs.NDFS$NameNode
if "%1" == "datanode" set CLASS=org.apache.nutch.ndfs.NDFS$DataNode
if "%1" == "ndfs" set CLASS=org.apache.nutch.fs.TestClient
if "%1" == "jobtracker" set CLASS=org.apache.nutch.mapReduce.JobTracker
if "%1" == "tasktracker" set CLASS=org.apache.nutch.mapReduce.TaskTracker
call java %JAVA_HEAP_MAX% -classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end

0.8.1 or 0.9

=============

@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else goto echoMSG
:echoMSG
echo Title:歡迎使用北京線點科技 Nutch 運行腳本
echo Author:jaddy0302 mail:[email protected] QQ:5622928
echo Site:http://www.xd-tech.com.cn 線點科技 專業垂直搜索引擎產品
echo Nutch Version: 0.7.2
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo   crawl             one-step crawler for intranets 一步式抓取企業內部網
echo   inject            inject new urls into the database 添加新的 網站url 到數據庫
echo   generate          generate new segments to fetch    生成新的獲取數據
echo   fetch             fetch a segment's pages 存取數據段的頁
echo   parse             parse a segment's pages 解析數據段的頁
ECHO   mergedb           merge crawldb-s, with optional filtering 合併幾個數據段的索引
echo   dedup             remove duplicates from a set of segment indexes 從數據段集刪除重複的索引
echo   updatedb          update db from segments after fetching 在獲取後從數據段中更新數據庫
echo   updatesegs        update segments with link data from the db 從數據庫中更新數據段和鏈接數據
echo   mergesegs         merge multiple segments into a single segment 合併多重數據段成一個單一的部分
echo   readdb            examine arbitrary fields of the database 審查數據庫任意字段
echo   segread           read, fix and dump segment data 閱讀,修理和丟棄數據段
echo   readlinkdb        read / dump link db 從數據段集刪除重複的索引
echo   readseg           read / dump segment data
echo   invertlinks       create a linkdb from parsed segments
echo   mergelinkdb       merge( 合併) linkdb-s, with optional filtering
echo   index             run the indexer on parsed segments and linkdb
echo   merge             merge several segment indexes 合併幾個數據段的索引
echo   segslice          append, join and slice segment data 附錄,加入和切片部分數據
echo   plugin            load a plugin and run one of its classes main() 加載 插件
echo   server            run a search server 運行搜索的服務器
echo or
echo   CLASSNAME         run the class named CLASSNAME 運行指定類名的類
echo Most commands print help when invoked w/o parameters. 當參數錯誤時命令會打印幫助
goto end;
:INIT
ECHO 請注意修改此處   set NUTCH_HOME=e:/cyg/Nutch
set NUTCH_HOME=e:/cyg/Nutch
if "%NUTCH_HOME%"=="" set NUTCH_HOME=..
set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%/lib/*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1" == "admin" set CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set CLASSorg.apache.nutch.crawl.Injector
if "%1" == "generate" set CLASS=org.apache.nutch.crawl.Generator
if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set CLASS=org.apache.nutch.parse.ParseSegment

if "%1" == "readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if "%1"=="mergedb" set CLASS= org.apache.nutch.crawl.CrawlDbMerger
if "%1" == "readlinkdb" set   CLASS=org.apache.nutch.crawl.LinkDbReader

if "%1" == "readseg" set   CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread" set   CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread"   echo [DEPRECATED] Command [segread] is deprecated, use [readseg] instead.
if "%1" == "mergesegs" set CLASS=org.apache.nutch.segment.SegmentMerger
if "%1" == "updatedb" set   CLASS=org.apache.nutch.crawl.CrawlDb
if "%1" == "invertlinks" set   CLASS=org.apache.nutch.crawl.LinkDb
if "%1" == "mergelinkdb" set   CLASS=org.apache.nutch.crawl.LinkDbMerger
if "%1" == "index" set   CLASS=org.apache.nutch.indexer.Indexer
if "%1" == "dedup" set   CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "merge" set   CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "plugin" set   CLASS=org.apache.nutch.plugin.PluginRepository
if "%1" == "server" set   CLASS=org.apache.nutch.searcher.DistributedSearch$Server
ECHO java %JAVA_HEAP_MAX% -classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
call java %JAVA_HEAP_MAX% -classpath %%classpath%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end

將以拷貝存爲Bat 即可 注意是兩個版本的

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章