網頁爬蟲 - 為什么python模擬登陸 appannie一直返回503 code
問題描述
#-*-encoding:utf-8-*-import requests, xlwt, sysfrom bs4 import BeautifulSoupreload(sys)referer = 'https://www.appannie.com/account/login/?_ref=header'user_agent = (’Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36’)sys.setdefaultencoding(’utf-8’)header = {'User-Agent': user_agent, 'Referer': referer, 'Host': 'www.appannie.com', ’Connection’: ’keep-alive’, ’Accept’: ’application/json, text/plain,*/*’, ’Accept-Encoding’: ’gzip, deflate, sdch’, ’Accept-Language’: ’zh-CN,zh;q=0.8’, ’X-NewRelic-ID’: ’VwcPUFJXGwEBUlJSDgc=’, ’X-Requested-With’: ’XMLHttpRequest’, }def main(): url = ’https://www.appannie.com/account/login/’ # content = requests.get(url,headers = header).content # soup = BeautifulSoup(content,’lxml’) # key = soup.select() s = requests.Session() s.get(url,headers = header) key = s.cookies[’csrftoken’] data = { ’csrfmiddlewaretoken’: key , ’next’: ’/dashboard/home/’ , ’username’:’[email protected]’ , ’password’:’xxxxx’ } req = s.post(url,data = data) if 2 != req.status_code / 100 :raise Exception('Error while logging in, code: %d' % (req.status_code)) cookies = req.cookies n = ’2017-04-11’ url_1 = ’https://www.appannie.com/apps/google-play/top-chart/?country=US&category=game&device=&date={}’.format(n) req_1 = s.get(url_1,headers = header,cookies = cookies).content #print req_1 soup = BeautifulSoup(req_1,’lxml’) print soup # ids = soup.find_all(’span’) # for id in ids : # name = id.get(’title’) # print nameif __name__ == ’__main__’: main()
問題解答
回答1:兩個關鍵點:1. headers的user-agent2. csrfmiddlewaretoken參數
# coding: utf-8import requestsurl = ’https://www.appannie.com/account/login’session = requests.Session()session.headers[’user-agent’] = ’Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36’session.get(url)token = session.cookies.get(’csrftoken’)data = { ’csrfmiddlewaretoken’: token, ’next’:’/dashboard/home/’, ’username’:’XXXX’, ’password’:’XXXX’}r = session.post(url, data)print r.status_code
相關文章:
