從這裏轉載的免cygwin命令行:http://hi.baidu.com/haojielyb/blog/item/778318306ecb809da8018ecb.html
多謝這位大牛了。寫得很好。但在nutch 1.0中測試無效,錯不在這些bat,在Hadoop。
nutch window 版 bat 命令 免 cygwin 中文
0.7.2=============
@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else
goto echoMSG
:echoMSG
echo 李永波製作
echo Nutch Version: 0.7.2
echo
Usage: nutch COMMAND
echo where COMMAND is one of:
echo
crawl one-step crawler for intranets 一步式抓取企業內部網
echo
admin database administration, including creation 數據庫管理(包括建立)
echo inject inject new urls into the database 添加新的 網站url 到數據庫
echo generate generate new segments to fetch 生成新的獲取數據
echo fetchlist print the fetchlist of a segment 打印數據的存取列表
echo fetch fetch a segment's pages 存取數據段的頁
echo
parse parse a segment's pages 解析數據段的頁
echo index
run the indexer on a segment's fetcher output 在段的存取輸出上運行索引
echo
merge merge several segment indexes 合併幾個數據段的索引
echo
dedup remove duplicates from a set of segment indexes 從數據段集刪除重複的索引
echo updatedb update db from segments after fetching
在獲取後從數據段中更新數據庫
echo updatesegs update segments with link data from
the db 從數據庫中更新數據段和鏈接數據
echo mergesegs merge multiple segments into
a single segment 合併多重數據段成一個單一的部分
echo readdb examine arbitrary
fields of the database 審查數據庫任意字段
echo analyze adjust database
link-analysis scoring 調整數據庫連接,分析得分
echo prune prune segment
index(es) of unwanted content 修剪部分索引不想要的內容
echo segread read,
fix and dump segment data 閱讀,修理和丟棄數據段
echo segslice append, join
and slice segment data 附錄,加入和切片部分數據
echo server run a search
server 運行搜索的服務器
echo namenode run the NDFS namenode 運行 NDFS 名稱 節點
echo datanode run an NDFS datanode 運行ndfs 數據節點
echo
ndfs run an NDFS admin client 運行NDFS 管理節點
echo
jobtracker run the MapReduce job Tracker node 運行mapreduce作業跟蹤節點
echo tasktracker run a MapReduce task Tracker node 運行mapreduce任務跟蹤節點
echo or
echo CLASSNAME run the class named CLASSNAME 運行指定類名的類
echo Most commands print help when invoked w/o parameters. 當參數錯誤時命令會打印幫助
goto end;
:INIT
set NUTCH_HOME=C:/work/nutch-0.7.2
if
"%NUTCH_HOME%"=="" set NUTCH_HOME=..
set
CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo
off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set
CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in
(%NUTCH_HOME%/lib/*.jar) do @echo set
CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set
CLASS=org.apache.nutch.tools.CrawlTool
if "%1" == "admin" set
CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set
CLASS=org.apache.nutch.db.WebDBInjector
if "%1" == "generate" set
CLASS=org.apache.nutch.tools.FetchListTool
if "%1" == "fetchlist" set
CLASS=org.apache.nutch.pagedb.FetchListEntry
if "%1" == "fetch" set
CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set
CLASS=org.apache.nutch.tools.ParseSegment
if "%1" == "index" set
CLASS=org.apache.nutch.indexer.IndexSegment
if "%1" == "merge" set
CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "dedup" set
CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "updatedb" set
CLASS=org.apache.nutch.tools.UpdateDatabaseTool
if "%1" == "updatesegs" set
CLASS=org.apache.nutch.tools.UpdateSegmentsFromDb
if "%1" == "mergesegs" set
CLASS=org.apache.nutch.tools.SegmentMergeTool
if "%1" == "readdb" set
CLASS=org.apache.nutch.db.WebDBReader
if "%1" == "prune" set
CLASS=org.apache.nutch.tools.PruneIndexTool
if "%1" == "segread" set
CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segslice" set
CLASS=org.apache.nutch.segment.SegmentSlicer
if "%1" == "analyze" set
CLASS=org.apache.nutch.tools.LinkAnalysisTool
if "%1" == "server" set
CLASS=org.apache.nutch.searcher.DistributedSearch$Server
if "%1" ==
"namenode" set CLASS=org.apache.nutch.ndfs.NDFS$NameNode
if "%1" ==
"datanode" set CLASS=org.apache.nutch.ndfs.NDFS$DataNode
if "%1" == "ndfs"
set CLASS=org.apache.nutch.fs.TestClient
if "%1" == "jobtracker" set
CLASS=org.apache.nutch.mapReduce.JobTracker
if "%1" == "tasktracker" set
CLASS=org.apache.nutch.mapReduce.TaskTracker
call java %JAVA_HEAP_MAX%
-classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end
0.8.1 or 0.9
=============
@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else
goto echoMSG
:echoMSG
echo Title:歡迎使用北京線點科技 Nutch 運行腳本
echo
Author:jaddy0302 mail:[email protected] QQ:5622928
echo
Site:http://www.xd-tech.com.cn 線點科技 專業垂直搜索引擎產品
echo Nutch Version: 0.7.2
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo
crawl one-step crawler for intranets 一步式抓取企業內部網
echo
inject inject new urls into the database 添加新的 網站url 到數據庫
echo
generate generate new segments to fetch 生成新的獲取數據
echo
fetch fetch a segment's pages 存取數據段的頁
echo parse
parse a segment's pages 解析數據段的頁
ECHO mergedb merge crawldb-s,
with optional filtering 合併幾個數據段的索引
echo dedup remove
duplicates from a set of segment indexes 從數據段集刪除重複的索引
echo
updatedb update db from segments after fetching 在獲取後從數據段中更新數據庫
echo updatesegs update segments with link data from the db
從數據庫中更新數據段和鏈接數據
echo mergesegs merge multiple segments into a
single segment 合併多重數據段成一個單一的部分
echo readdb examine arbitrary
fields of the database 審查數據庫任意字段
echo segread read, fix and dump
segment data 閱讀,修理和丟棄數據段
echo readlinkdb read / dump link db
從數據段集刪除重複的索引
echo readseg read / dump segment data
echo
invertlinks create a linkdb from parsed segments
echo
mergelinkdb merge( 合併) linkdb-s, with optional filtering
echo
index run the indexer on parsed segments and linkdb
echo
merge merge several segment indexes 合併幾個數據段的索引
echo
segslice append, join and slice segment data 附錄,加入和切片部分數據
echo
plugin load a plugin and run one of its classes main() 加載 插件
echo server run a search server 運行搜索的服務器
echo or
echo
CLASSNAME run the class named CLASSNAME 運行指定類名的類
echo Most commands
print help when invoked w/o parameters. 當參數錯誤時命令會打印幫助
goto end;
:INIT
ECHO 請注意修改此處 set NUTCH_HOME=e:/cyg/Nutch
set NUTCH_HOME=e:/cyg/Nutch
if "%NUTCH_HOME%"=="" set NUTCH_HOME=..
set
CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo
off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set
CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in
(%NUTCH_HOME%/lib/*.jar) do @echo set
CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set
CLASS=org.apache.nutch.crawl.Crawl
if "%1" == "admin" set
CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set
CLASSorg.apache.nutch.crawl.Injector
if "%1" == "generate" set
CLASS=org.apache.nutch.crawl.Generator
if "%1" == "fetch" set
CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set
CLASS=org.apache.nutch.parse.ParseSegment
if "%1" == "readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if
"%1"=="mergedb" set CLASS= org.apache.nutch.crawl.CrawlDbMerger
if "%1" ==
"readlinkdb" set CLASS=org.apache.nutch.crawl.LinkDbReader
if "%1" == "readseg" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread" echo [DEPRECATED] Command [segread] is deprecated,
use [readseg] instead.
if "%1" == "mergesegs" set
CLASS=org.apache.nutch.segment.SegmentMerger
if "%1" == "updatedb" set
CLASS=org.apache.nutch.crawl.CrawlDb
if "%1" == "invertlinks" set
CLASS=org.apache.nutch.crawl.LinkDb
if "%1" == "mergelinkdb" set
CLASS=org.apache.nutch.crawl.LinkDbMerger
if "%1" == "index" set
CLASS=org.apache.nutch.indexer.Indexer
if "%1" == "dedup" set
CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "merge" set
CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "plugin" set
CLASS=org.apache.nutch.plugin.PluginRepository
if "%1" == "server" set
CLASS=org.apache.nutch.searcher.DistributedSearch$Server
ECHO java
%JAVA_HEAP_MAX% -classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
call java %JAVA_HEAP_MAX% -classpath %%classpath%% %CLASS% %2 %3 %4 %5 %6 %7
%8 %9
:end
將以拷貝存爲Bat 即可 注意是兩個版本的