用Python爬了下知乎的“沙雕”问题,我们得到了一个15强排行榜



import reimport seleniumfrom selenium import webdriverimport requestsfrom bs4 import BeautifulSoupimport pandas as pdimport timedriver = webdriver.Chrome()driver.maximize_window()url = 'https://www.zhihu.com/question/37453271'js='window.open("'+url+'")'driver.execute_script(js)driver.close()driver.switch_to_window(driver.window_handles[0])for i in range(100):js="var q=document.documentElement.scrollTop=10000000"driver.execute_script(js)all_html = [k.get_property('innerHTML') for k in driver.find_elements_by_class_name('AnswerItem')]all_text = ''.join(all_html)#all_text = all_text.replace('u002F','/')all_text = all_text.replace('questions','question')pat = 'question/d+'questions = list(set([k for k in re.findall(pat,all_text)]))

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win32; x32; rv:54.0) Gecko/20100101 Firefox/54.0','Connection': 'keep-alive'}cookies ='v=3; iuuid=1A6E888B4A4B29B16FBA1299108DBE9CDCB327A9713C232B36E4DB4FF222CF03; webp=true; ci=1%2C%E5%8C%97%E4%BA%AC; __guid=26581345.3954606544145667000.1530879049181.8303; _lxsdk_cuid=1646f808301c8-0a4e19f5421593-5d4e211f-100200-1646f808302c8; _lxsdk=1A6E888B4A4B29B16FBA1299108DBE9CDCB327A9713C232B36E4DB4FF222CF03; monitor_count=1; _lxsdk_s=16472ee89ec-de2-f91-ed0%7C%7C5; __mta=189118996.1530879050545.1530936763555.1530937843742.18'cookie = {}for line in cookies.split(';'):name, value = cookies.strip().split('=', 1)cookie[name] = valuequestions_df = pd.DataFrame(columns = ['title','visit','follower','answer','is_open'])for i in range(len(questions)):try:url = 'https://www.zhihu.com/'+questions[i]html = requests.get(url,cookies=cookie, headers=header).contentbsObj = BeautifulSoup(html.decode('utf-8'),"html.parser")text = str(bsObj)title = bsObj.find('h1',attrs={'class':'QuestionHeader-title'}).textvisit = int(re.findall('"visitCount":d+',text)[0].replace('"visitCount":',''))follower = int(re.findall('"followerCount":d+',text)[0].replace('"followerCount":',''))answer = int(re.findall('"answerCount":d+',text)[0].replace('"answerCount":',''))is_open = int(len(re.findall('问题已关闭',text))==0)questions_df = questions_df.append({'title':title,'visit':visit,'follower':follower,'answer':answer,'is_open':is_open},ignore_index=True)time.sleep(2)print(i)except:print('错误'+str(i))






https://www.zhihu.com/question/276876453




https://www.zhihu.com/question/37160000

https://www.zhihu.com/question/36805199







https://www.zhihu.com/question/52083864

https://www.zhihu.com/question/24393996


https://www.zhihu.com/question/21370688/

https://www.zhihu.com/question/20831390/


实习/全职编辑记者招聘ing
加入我们,亲身体验一家专业科技媒体采写的每个细节,在最有前景的行业,和一群遍布全球最优秀的人一起成长。坐标北京·清华东门,在大数据文摘主页对话页回复“招聘”了解详情。简历请直接发送至zz@bigdatadigest.cn


关注公众号:拾黑(shiheibook)了解更多
[广告]赞助链接:
四季很好,只要有你,文娱排行榜:https://www.yaopaiming.com/
让资讯触达的更精准有趣:https://www.0xu.cn/
关注网络尖刀微信公众号随时掌握互联网精彩
赞助链接
排名
热点
搜索指数
- 1 建设人民城市 致广大而尽精微 7904323
- 2 香港大埔火灾已致75人遇难 7808409
- 3 王毅:日本现职领导人公然开历史倒车 7713074
- 4 一图区分普通感冒和流感 7615866
- 5 国防部回应中国是否正建造核动力航母 7519925
- 6 李家超:香港大埔火灾已全部受控 7428986
- 7 日本朝日集团道歉:大批客户信息泄露 7334109
- 8 贵州榕江群众为香港火灾受灾者募捐 7234690
- 9 日本维新会:或退出执政联盟 7137727
- 10 国防部回应“演习是否意在警告日本” 7047845







大数据文摘
