URL字符串是base64编码,解码给出的例子:/ JRA3Q / Client9120 /每天/ anl_isentrop / anl_isentrop_bvf2.cglt9103.day0107
根据这些信息,你可以很容易的算出,对于一个给定的数据集,哪些部分需要替换的字符串。然后,使用提供的下载脚本:
# !/usr/bin/env python导入urllib进口urllib2进口urlparse进口cookielib进口HTMLParser导入子流程导入系统导入操作系统导入optparse进口netrc进口getpass类CASLoginParser (HTMLParser.HTMLParser): def __init__(自我):HTMLParser.HTMLParser.__init__自我(自我)。action =没有自我。data = {} def handle_starttag(自我、tagname属性):如果tagname.lower() = =“形式”:属性= dict(属性)如果“行动”属性:自我。action =属性('行动']elif tagname.lower() = =“输入”:属性= dict(属性)如果“名称”属性和“价值”在属性:自我。数据(属性['名字']]=属性(“价值”)类DIASAccess (): def __init__(自我,用户名、密码):自我。__cas_url = ' https://auth.diasjp.net/cas/login?的自我。__username =用户名的自我。__password =密码#自我。__cj = cookielib.CookieJar()的自我。__cj = cookielib.MozillaCookieJar()的自我。__opener = urllib2。build_opener (urllib2.HTTPCookieProcessor (self.__cj) def开放(自我,url, data = None): = self.__opener响应。打开(url、数据)response_url = response.geturl()如果response_url ! = url和response_url.startswith (self.__cas_url): #重定向到CAS登录页面响应= self.__login_cas(响应)如果数据! =没有:#如果帖子(数据! = None),需要重开response.close = self.__opener()反应。打开(url,数据)返回响应def __login_cas(自我、响应):解析器= CASLoginParser () parser.feed (response.read ()) parser.close()如果解析器。action = =没有:提高LoginError(“不登录页面”)action_url = urlparse.urljoin (response.geturl (), parser.action) data =解析器。数据数据(“用户名”)=自我。__username数据['密码']=自我。__password response.close() response = self.__opener.open(action_url, urllib.urlencode(data)) if response.geturl() == action_url: print 'Authorization fail' quit() return response def dl(self, url, path, file, data=None): try: response = self.__opener.open(url, data) if not os.path.exists('.' + path): os.makedirs('.' + path) f = open('.' + path + file, 'wb') file_size_dl = 0 block_size = 8192 while True: buffer = response.read(block_size) if not buffer: break file_size_dl += len(buffer) f.write(buffer) f.close print path + file + " OK" return response except urllib2.HTTPError,e: print path + file + " NG" class LoginError(Exception): def __init__(self, e): Exception.__init__(self, e) if __name__ == '__main__': host = 'data.diasjp.net' usage ='''usage: %prog [options]''' parser = optparse.OptionParser(usage=usage) parser.add_option('-n', '--netrc', default=None, help='specify the netrc file', metavar='FILE') parser.add_option('-u', '--user', default=None, help='specify the DIAS account name', metavar='USERNAME') (options, args) = parser.parse_args() (login, password) = (None, None) try: auth = netrc.netrc(options.netrc).authenticators(host) if auth is not None: (login, account, password) = auth except (IOError): pass if options.user is not None: login = options.user password = None if login is None: login = raw_input('Username: ') if password is None: password = getpass.getpass('Password: ') access = DIASAccess(login, password) targeturl='https://data.diasjp.net/dl/storages/filelist/dataset:645' response = access.open(targeturl) response.close() access.dl('https://data.diasjp.net/dl/storages/downloadCmd/L0pSQTNRL0NsaW05MTIwL0RhaWx5L2FubF9pc2VudHJvcC9hbmxfaXNlbnRyb3BfYnZmMi5jbGltOTEyMC5kYXkwMTAx', '/JRA3Q/Clim9120/Daily/anl_isentrop/', 'anl_isentrop_bvf2.clim9120.day0101')
和替换的参数access.dl
适当。