一個簡單(搜索EMail)的蜘蛛程序

 

unit UIDTcpClientThread;

interface
uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient, IdHTTP,
  IniFiles;
 const
  MAXBUFF = 100000;

type
  TIDTcpClientThread = class(TThread)
  private
    bRun: Boolean;
  protected
   // Response2: TStringStream;
    StringList: TStringList;
    IdHTTPWeb: TIdHTTP;
    procedure Execute; override;
    procedure OutPutData();
    procedure ThreadExeProcedure();
    procedure AnalysisData();
  public
    URLBuffer: array[1..MAXBUFF] of string;
    iCurrentRecord: integer;
    iAlreadyRecord: integer;
    bBeginToTerminate: Boolean;
    constructor Create();
    destructor Destroy; override;
    procedure AppendURLToIDTcpClientThread(URL: string);
    procedure CloseThread();
    procedure OutPutURL(URL: string);
  end;

 

implementation

uses Unit1, UHandleURLThread;

{ TIDTcpClientThread }

procedure TIDTcpClientThread.ThreadExeProcedure;
var
  s: string;
begin
  if iCurrentRecord <> iAlreadyRecord then begin
    inc(iAlreadyRecord);
    if iAlreadyRecord > MAXBUFF then iAlreadyRecord := 1;
    try
     // Response2.Free;
      //Response2 := TStringStream.Create('');
      StringList.Clear;
      s := IdHTTPWeb.Get(URLBuffer[iAlreadyRecord]);
    //  s := Response2.DataString;
      //s := Utf8ToAnsi(s);
      StringList.Text := s;
      AnalysisData();

    except
      on E: Exception do begin
      //
      end;
    end;
  end;
end;

procedure TIDTcpClientThread.AppendURLToIDTcpClientThread(URL: string);
var
  iCurrentRecord_Temp: integer;
begin
  iCurrentRecord_Temp := iCurrentRecord;
  inc(iCurrentRecord_Temp);
  if iCurrentRecord_Temp > MAXBUFF then iCurrentRecord_Temp := 1;
  URLBuffer[iCurrentRecord_Temp] := URL;
  iCurrentRecord := iCurrentRecord_Temp;
end;

procedure TIDTcpClientThread.CloseThread;
begin

end;

constructor TIDTcpClientThread.Create;
begin
  inherited Create(false);
  StringList := TStringList.Create;
  IdHTTPWeb := TIdHTTP.Create(nil);

//  Response2 := TStringStream.Create('');
  IdHTTPWeb.HandleRedirects := true;
  IdHTTPWeb.Request.ContentType := 'application/x-www-form-urlencoded';

  StringList.Clear;

  bBeginToTerminate := true;
  bRun := false;
  iCurrentRecord := 1;
  iAlreadyRecord := 1;
end;

destructor TIDTcpClientThread.Destroy;
begin
  StringList.Clear;
  StringList.Free;
  IdHTTPWeb.Free;
  inherited;
end;

procedure TIDTcpClientThread.Execute;
begin
  inherited;
  while bBeginToTerminate do begin
    if not bRun then begin
      bRun := true;
      try
        sleep(10);
        ThreadExeProcedure;
      except
        on E: Exception do begin
        //
        end;
      end;

      bRun := false;
    end;
  end;

  bBeginToTerminate := true;
end;

procedure TIDTcpClientThread.OutPutData;
begin

end;

procedure TIDTcpClientThread.OutPutURL(URL: string);
begin
  EnterCriticalSection(CriticalSection);
  if assigned(HandleURLThread) then HandleURLThread.AppendURLToThread(URL);
  LeaveCriticalSection(CriticalSection);
end;
{var
  indexHashed: integer;
begin
  indexHashed := HashedStringList.IndexOf(URL);
  if indexHashed = -1 then begin
    HashedStringList.Add(URL);
    inc(indexofthread);
    if indexofthread >= MAXTHREAD then indexofthread := 0;
    IDTcpClientThread[indexofthread].AppendURLToIDTcpClientThread(URL);
   // Form1.Memo1.Lines.Add(URL);
  end;
end;
}
procedure TIDTcpClientThread.AnalysisData();
const
  HEADSTRING = 'href="';
  ENDSTRING = '"';
var
  ts, TempURL: string;
  i, j, sIndex, sEnd, sLen, ps: integer;
begin
//<a href=" ">
// " , http ,
//½ØÈ¡ Ö÷ÍøÒ³£¬Á½¸öhTTP
//<a href="mailto:[email protected]">[email protected]</a>
  for i := 0 to StringList.Count - 1 do begin
    ts := Lowercase(StringList.Strings[i]);
    sIndex := Pos(HEADSTRING, ts);
    sLen := Length(ts);
    while sIndex > 0 do begin
      delete(ts, 1, sIndex + Length(HEADSTRING) - 1);

      for j := 1 to MaxMailz do begin
        if Pos(Mailz[j], ts) > 0 then begin
          if assigned(SendEmailThread) then SendEmailThread.AppendURLToThread(ts);

          break;
        end;
      end;

      sEnd := Pos(ENDSTRING, ts);
      TempURL := Copy(ts, 1, sEnd - 1);
      if
        (Pos('.com', ts) > 0) or (Pos('.cn', ts) > 0) or (Pos('.mobi', ts) > 0) or (Pos('.tel', ts) > 0) or
        (Pos('.asia', ts) > 0) or (Pos('.net', ts) > 0) or (Pos('.org', ts) > 0) or (Pos('.name', ts) > 0) or
        (Pos('.me', ts) > 0) or (Pos('.tv', ts) > 0) or (Pos('.hk', ts) > 0) or (Pos('.biz', ts) > 0) or
        (Pos('.info', ts) > 0)
        then begin //.com.cn.mobi.tel.asia.net.org.name.me.tv.hk.biz.info
        OutPutURL(TempURL);
      end;

      delete(ts, 1, sEnd);
      sIndex := Pos(HEADSTRING, ts);
    end;
  end;
end;

end.

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章