hax01tips
注意:本文之后極有可能設(shè)為私有
題目

hax01
Your mission is the following: Simply enter a URL into the box. The domain of the URL must be or end with 'nasa.gov'. The URL will be fetched right away. The content returned should contain the string: "2200178118" in the first 10 Kbytes of data. 404/403/etc error pages are not accepted. Remember, do not do anything illegal. Make sure you type the right URL, do not guess.

Hint: google is your friend.
http://google.com/search?q=site:nasa.gov


當(dāng)時(shí)我的思路是找出所有以nasa.gov結(jié)尾的域名,然后遍歷這些網(wǎng)址。之后我真的寫(xiě)了個(gè)python程序,取了google檢索出來(lái)的前1000個(gè)頁(yè)面,取出域名,保存起來(lái),去除重復(fù)的有500多個(gè)。接著,讀取html頁(yè)面,判斷是否還有字符串。其間,遇到了個(gè)網(wǎng)速的問(wèn)題,超時(shí)后經(jīng)常會(huì)跑到電信的114搜索上去。驗(yàn)證了170多個(gè)頁(yè)面后,我發(fā)現(xiàn)自己理解錯(cuò)題目了,這里的URL并不是指URL以nasa.gov結(jié)尾,而是指URL的域名以nasa.gov結(jié)尾。我無(wú)語(yǔ)了,這相當(dāng)于域名下的所有網(wǎng)頁(yè)都有可能。這個(gè)工作量巨大得幾乎是不可能的。暫時(shí)中止。
以下代碼可供參考,修改了n次,可能現(xiàn)在已經(jīng)沒(méi)法直接運(yùn)行。

2.5
1
from urllib import FancyURLopener
2
import urllib2
3
import sys
4
import re
5
import locale
6
"""
7
class MyOpener(FancyURLopener):
8
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11)Gecko/20071127 Firefox/2.0.0.11'
9
10
res = re.compile(r'(([a-zA-Z]+\.)+nasa.gov)')
11
12
myopener = MyOpener()
13
url = 'http://www.google.co.jp/search?&num=100&as_qdr=all&as_occt=any&as_sitesearch=nasa.gov'
14
li = []
15
for i in range(0, 10):
16
url = url + '&start=' + str(i*100)
17
page = myopener.open(url)
18
str1 = page.read()
19
for aItem in res.findall(str1):
20
if not aItem[0] in li:
21
li.append(aItem[0])
22
"""
23
with open('nasa.txt') as li:
24
#li = open('nasa.txt')
25
#print li.count
26
m = 0
27
for a in li:
28
#print 'http://'+a
29
m = m + 1
30
print m
31
url = a
32
req = urllib2.Request(url)
33
try:
34
response = urllib2.urlopen(req)
35
the_page = response.read()
36
with open(url + '.txt') as nasa:
37
write(the_page)
38
if the_page.find(r'daohang.118114.cn') <> -1 :
39
print '114'
40
elif the_page.find('2200178118', 0, 10240) <> -1 :
41
print url
42
else :
43
print '
'
44
except urllib2.URLError, e:
45
print e.reason
46
47
"""
48
#gUrl = 'http://www.google.co.jp/search?hl=ja&source=hp&q=site%3Anasa.gov&lr=&aq=f&oq='
49
#google = urllib.urlopen(gUrl)
50
#str = google.read()
51
for str in open('sitenasa_gov.htm'):
52
for aItem in res.findall(str):
53
print aItem[0]
54
55
#print str
56
str = 'www.xxx.nasa.gov/wwf.nasa.gov'
57
58
"""
59
60
61
#2200178118
62

3.1
1
from urllib.request import FancyURLopener
2
import urllib
3
import sys
4
import re
5
import locale
6
"""
7
class MyOpener(FancyURLopener):
8
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11)Gecko/20071127 Firefox/2.0.0.11'
9
10
res = re.compile(r'(([a-zA-Z]+\.)+nasa.gov)')
11
12
myopener = MyOpener()
13
url = 'http://www.google.co.jp/search?&num=100&as_qdr=all&as_occt=any&as_sitesearch=nasa.gov'
14
li = []
15
for i in range(0, 10):
16
url = url + '&start=' + str(i*100)
17
page = myopener.open(url)
18
str1 = page.read()
19
for aItem in res.findall(str1):
20
if not aItem[0] in li:
21
li.append(aItem[0])
22
"""
23
fiPath = sys.argv[1]
24
with open(fiPath) as li:
25
#li = open('nasa.txt')
26
#print li.count
27
m = 0
28
for a in li:
29
#print 'http://'+a
30
m = m + 1
31
#print m
32
url = a
33
req = urllib.request.Request(url)
34
try:
35
response = urllib.request.urlopen(req)
36
the_page = response.read()
37
with open(url[7:-1] + '.txt', 'wb') as nasa:
38
nasa.write(the_page)
39
nasa.flush()
40
if the_page.decode('utf8').find(r'icc.qonc.com') != -1:
41
print('114')
42
elif the_page.decode('utf8').find('2200178118', 0, 10240) != -1:
43
print(url)
44
else :
45
print('
')
46
except urllib.error.URLError as e:
47
print(e.code)
48
except UnicodeDecodeError as UDE:
49
print(UDE)
50
51
"""
52
#gUrl = 'http://www.google.co.jp/search?hl=ja&source=hp&q=site%3Anasa.gov&lr=&aq=f&oq='
53
#google = urllib.urlopen(gUrl)
54
#str = google.read()
55
for str in open('sitenasa_gov.htm'):
56
for aItem in res.findall(str):
57
print aItem[0]
58
59
#print str
60
str = 'www.xxx.nasa.gov/wwf.nasa.gov'
61
62
"""
63
64
65
#2200178118
66
而后,過(guò)了大概幾個(gè)月,變換思路,解決,意外的簡(jiǎn)單……
事實(shí)上,只要向服務(wù)器提交數(shù)據(jù),一般服務(wù)器也會(huì)將該數(shù)據(jù)返回到頁(yè)面上。該題最后的hint不是讓我們來(lái)搜該域名,而是告訴我們?cè)鯓釉趃oogle.com的頁(yè)面上顯示我們想要的數(shù)據(jù)。譬如
http://www.google.co.jp/search?q=2200178118 該頁(yè)面的前10K里應(yīng)該包含了該字符串。接下來(lái),我們只需要在nasa.gov上找個(gè)頁(yè)面提交數(shù)據(jù)就行了。
over