In most situations you use the TWebBrowser to display HTML documents to the user - thus creating your own version of the (Internet Explorer) Web browser.
A very nice feature of a Browser is to display link information, for example, in the status bar, when the mouse hovers over a link in a document. This can also be done in Delphi: Get the Url of a Hyperlink when the Mouse moves Over a TWebBrowser Document.
Sometimes, you "only" want to extract all the links from a HTML document / URL. You want to get the HREF attribute of all A tags.
Here's how to extract all hyperlinks from an HTML document. The ExtractLinks procedure fills a TStrings object with the value of the HREF attribute of the A HTML element.
uses mshtml, ActiveX, COMObj, IdHTTP, idURI;
//extract "href" attribute from A tags from an URL - into a TStrings
procedure ExtractLinks(const url: String; const strings: TStrings) ;
var
iDoc : IHTMLDocument2;
strHTML : string;
v : Variant;
x : integer;
links : OleVariant;
docURL : string;
URI : TidURI;
aHref : string;
idHTTP : TidHTTP;
begin
strings.Clear;
URI := TidURI.Create(url) ;
try
docURL := 'http://' + URI.Host;
if URI.Path <> '/' then docURL := docURL + URI.Path;
finally
URI.Free;
end;
iDoc := CreateComObject(Class_HTMLDOcument) as IHTMLDocument2;
try
iDoc.designMode := 'on';
while iDoc.readyState <> 'complete' do Application.ProcessMessages;
v := VarArrayCreate([0,0],VarVariant) ;
idHTTP := TidHTTP.Create(nil) ;
try
strHTML := idHTTP.Get(url) ;
finally
idHTTP.Free;
end;
v[0]:= strHTML;
iDoc.write(PSafeArray(System.TVarData(v).VArray)) ;
iDoc.designMode := 'off';
while iDoc.readyState<>'complete' do Application.ProcessMessages;
links := iDoc.all.tags('A') ;
if links.Length > 0 then
begin
for x := 0 to -1 + links.Length do
begin
aHref := links.Item(x).href;
if (aHref[1] = '/') then
aHref := docURL + aHref
else if Pos('about:', aHref) = 1
then aHref := docURL + Copy(aHref, 7, Length(aHref)) ;
strings.Add(aHref) ;
end;
end;
finally
iDoc := nil;
end;
end;