"robots.txt" (http://www.mydomain.com/robots.txt). (-), , .
robots.txt - . . : (user-agent), , Disallow:
<> ":" <>
Robots.txt Unix. Windows Unix. FTP- . HTML-, , .
User-agent
User-agent . :
User-agent: googlebot
, "*":
User-agent: *
-. robots.txt. -.
Disallow:
Disallow. - . / . email.htm:
Disallow: email.htm
:
Disallow: /cgi-bin/
- "cgi-bin".
Disallow . , /bob /bob.html /bob/index.html.
Disallow , , . Disallow User-agent, robots.txt . robots.txt , .
robots.txt, #, . , :
Disallow: bob #comment
bob#comment. , .
, .
Disallow: bob #comment
, "*".
User-agent: *
Disallow:
:
User-agent: *
Disallow: /
"cgi-bin" "images":
User-agent: *
Disallow: /cgi-bin/
Disallow: /images/
Roverdog :
User-agent: Roverdog
Disallow: /
googlebot cheese.htm:
User-agent: googlebot
Disallow: cheese.htm
, robots.txt - , CNN Looksmart.
, Allow , .
robots.txt
robots.txt (. ), - "" . , robots.txt. , Open Directory Project. 2.4 URL robots.txt 75 .
robots.txt. , 5% robots.txt , 2% , . , :
- :
User-agent: *
Disallow: scooter
:
User-agent: scooter
Disallow: *
Disallow :
:
Disallow: /css/ /cgi-bin/ /images/
. /css//cgi-bin//images/. (/images/ /css/) .
:
Disallow: /css/
Disallow: /cgi-bin/
Disallow: /images/
DOS:
- robots.txt DOS. , - - , . robots.txt UNIX ASCII. FTP- DOS- UNIX-. .
:
, :
Disallow: /cgi-bin/ #this bans robots from our cgi-bin
, . , ? .
:
Disallow: /cgi-bin/
, . -, ?
404:
, - 404 ( ) . - . , robots.txt , html- - . , ? , html-, robots.txt. , robots.txt -.
:
slurp, ?
User-agent: *
Disallow: /
#
User-agent: slurp
Disallow:
, slurp . - slurp? , . slurp , .
- :
USER-AGENT: EXCITE
DISALLOW:
, robots.txt, - . User Disallow.
- :
Disallow: /AL/Alabama.html
Disallow: /AL/AR.html
Disallow: /Az/AZ.html
Disallow: /Az/bali.html
Disallow: /Az/bed-breakfast.html
:
Disallow: /AL
Disallow: /Az
, , . , , . robots.txt, 400 , 4000 ! , -, , .
Disallow!
Allow, Disallow. :
User-agent: Spot
Disallow: /john/
allow: /jane/
:
User-agent: Spot
Disallow: /john/
Disallow:
:
- :
User-agent: Spot
Disallow: john
"john" john". , , , .
, robots.txt ( - ?).
robots.txt, html-. , FrontPage robots.txt .
robots.txt - ? , - , .
, robots.txt , . :
http://www.mydomain.com/robots.txt
.
Google:
Google - , . .
User-agent: googlebot
Disallow: *.cgi
user-agent "googlebot". -.
- robots
robots , , , . , , . .
, , robots.txt.
, Inktomi , - robots. Inktomi "index,follow".
- Robots
robots html-. ( ):
<HTML>
<HEAD>
<META NAME=ROBOTS" CONTENT="NOINDEX, NOFOLLOW">
<META NAME="DESCRIPTION" CONTENT=" .">
<TITLE>...</TITLE>
</HEAD>
<BODY>
- robots
- . content :
index, noindex, follow, nofollow
, .
:
INDEX , .
FOLLOW , , . , , , INDEX FOLLOW. Inktomi. Inktomi "index, nofollow".
, :
= INDEX, FOLLOW
= NOINDEX,NOFLLOW
- robots:
<META NAME=ROBOTS" CONTENT="NOINDEX, FOLLOW">
<META NAME=ROBOTS" CONTENT="INDEX, NOFOLLOW">
<META NAME=ROBOTS" CONTENT="NOINDEX, NOFOLLOW">