欢迎光临
感谢一路有你

python request模块

sign token获取不到可测试手机端

百度翻译(案例)

import requests

header = {
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Mobile Safari/537.36'}
data = {
    'from': 'zh',
    'to': 'en',
    'query': '王明昌博客',
}

post_url = "http://fanyi.baidu.com/basetrans"
r = requests.post(post_url, data=data, headers=header)
print(r.content.decode())

获取简书个人中心文章

import requests
from lxml import etree
import os
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
for i in range(1,4):
    url_ = 'https://www.jianshu.com/u/4642b9fae22c?order_by=shared_at&page={}'.format(i)
    res = requests.get(url_,headers=headers)
    res = etree.HTML(res.content.decode())
    nodes = res.xpath('//ul[@class="note-list"]/li')
    for node in nodes:
        item = {}
        title = node.xpath('.//a[@class="title"]/text()')
        time = node.xpath('.//span[@class="time"]/@data-shared-at')[0]
        abstract = node.xpath('.//p[@class="abstract"]/text()')[0]
        img = node.xpath('.//img[@class="  img-blur-done"]')
        url = 'https://www.jianshu.com'+node.xpath('.//a/@href')[0]
        item['title'] = title
        item['time'] = time
        item['url'] = url
        item['abstract'] = title
        item['img'] = time
        print(item)

下载图片

#_*_coding:utf-8_*_
import requests
import re
import os
class GetImage(object):
    def __init__(self,url):
        self.url = url
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
        }
        self.dir_path = os.path.dirname(os.path.abspath(__file__))
        self.path = self.dir_path+'/imgs'
        isExists = os.path.exists(self.dir_path+'/imgs')
        # 创建目录
        if not isExists:
            os.makedirs(self.path)

    def download(self,url):
        try:
            res = requests.get(url,headers=self.headers)
            return res
        except Exception as E:
            print(url+'下载失败,原因:'+E)


    def parse(self,res):
        content = res.content.decode()
        # print(content)
        img_list = re.findall(r'<img.*?src="(.*?)"',content,re.S)
        img_list = ['http://www.yangqq.com/skin/jxhx/'+url for url in img_list]
        return img_list

    def save(self,res_img,file_name):
        if res_img:
            with open(file_name,'wb') as f:
                f.write(res_img.content)
            print(url+'下载成功')

    def run(self):
        # 下载
        res = self.download(self.url)
        # 解析
        url_list = self.parse(res)
        # 下载图片
        for url in url_list:
            res_img = self.download(url)
            name = url.strip().split('/').pop()
            file_name = self.path+'/'+name
            # 保存
            self.save(res_img,file_name)

if __name__ == '__main__':
    url_list = ['https://www.yangqq.com/skin/jxhx/', 'https://www.yangqq.com/skin/jxhx/list.html',
                'https://www.yangqq.com/skin/jxhx/share.html', 'https://www.yangqq.com/skin/jxhx/list2.html',
                'https://www.yangqq.com/skin/jxhx/list3.html', 'https://www.yangqq.com/skin/jxhx/daohang.html',
                'https://www.yangqq.com/skin/jxhx/about.html']
    for url in url_list:
        text = GetImage(url)
        text.run()

获取魔童降世影评,并保存CSV

#_*_coding:utf-8_*_
import requests
import re
import csv
import time
from lxml import etree

def get_one_page(url):
   try:
       headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'}
       response = requests.get(url,headers=headers,timeout=10)
       if response.status_code == 200:
           # print(response.text)
           return response
       return None
   except EOFError as e:
       print(e)
       return None


def parse_one_page(res,info):
   info = []
   res = etree.HTML(res.content.decode())
   nodes_list = res.xpath('//div[@class="comment-item"]')
   for node in nodes_list:
       comic = {}
       comic['User'] = node.xpath('.//span[@class="comment-info"]/a/text()')[0].strip()
       comic['Time'] = node.xpath('.//span[@class="comment-info"]/span[3]/text()')[0].strip()
       comic['Comment'] = node.xpath('.//span[@class="short"]/text()')[0].strip()
       print(comic)
       info.append(comic)
   return info


def write_to_file(info):
    with open('《哪吒之魔童降世》短评.csv','a',newline='') as f:
       fieldnames = ['User','Time','Comment']
       writer = csv.DictWriter(f,fieldnames=fieldnames)
       writer.writeheader()
       try:
           writer.writerows(info)
       except:
           pass


def main(start):
   info = {}
   url = 'https://movie.douban.com/subject/26794435/comments?start=' + str(start) + '&limit=20&sort=new_score&status=P&percent_type='
   html = get_one_page(url)
   data = parse_one_page(html,info)
   write_to_file(data)


if __name__ == '__main__':
   for i in range(10):
       main(i*20)
       print('第{}本页采集完毕。'.format(str(i))) # 采集完一页后的标识
       time.sleep(1) # 采集完一页休息一秒

代理

proxies = {"http":"http://27.152.90.200:80"}
header = {}
request.get("http://www.baidu.com",proxies=proxies,headers=header)

模拟登录

赞(0) 打赏
未经允许不得转载:王明昌博客 » python request模块
分享到: 更多 (0)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏

隐藏
变装