Python之美第十次作业

2023年04月22日星期六阴北京市北京师范大学,今天完成了第十次作业,同样还是在vim上完成,然后贴到Jupyter上,插入图片展示作业。同时今天获得vim整块代码移到的技巧,比如选中前十行,然后按shift+>右移,同时shift+<左移。第二个方法是命令模式,使用:m,n>将第m行到第n行,右移shiftwidth,同时:m,n<将第m行到第n行,左移shiftwidth,其中shiftwidth=4可以在/etc/vimrc中设置参数。

Python爬取bnu网站数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
# Author: 冯振华
# Version: 1.0
# Date: 2023年 04月 22日 星期六 13:22:34 CST
# Copyright © 2023 feng <feng@arch>
# Distributed under terms of the MIT license.
#
# 导入库
import os
import requests
from bs4 import BeautifulSoup
from requests.compat import urljoin

# 设置存储文件夹
dirname = './头条关注'
if not os.path.exists(dirname): #如果文件夹不存在,则创建
os.mkdir(dirname)
# 伪装成浏览器
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'http://news.bnu.edu.cn/zx/ttgz/index.htm'
#
html = requests.get(url, headers=header)
html.encoding='utf-8'
# 把url内容给bs去解析
bsObj = BeautifulSoup(html.content, 'lxml')
lis = bsObj.find_all('li', class_="item-info01")
artnum = 0
for li in lis:
artnum = artnum + 1
if artnum <=3: #控制显示前三篇
name = li.find('h3').text.strip()
namepath = dirname +'/' + name
if not os.path.exists(namepath): #如果文件夹不存在,则创建
os.mkdir(namepath)
artlink = li.find('a')['href']
url2 = urljoin(url,artlink)
html2 = requests.get(url2,headers=header)
bsObj2 = BeautifulSoup(html2.content, 'lxml')
lis2 = bsObj2.find_all('div',class_="article")
for para in lis2:
## 爬取文章内容
try:
paras = para.find_all('span')
with open(namepath + '/' + name + '.txt', 'w') as f:
f.write(name+'\n')
for parasub in paras:
f.write(parasub.text.strip()+'\n')
except:
print("没有发现文章!")
## 爬取文章图片
lis3 = bsObj2.find_all('p')
k = 1
for picls in lis3:
try:
imglink = picls.find('img')['src']
url3 = urljoin(url, imglink)
html3 = requests.get(url3, headers=header)
with open(namepath+'/'+name + str(k) +imglink[imglink.rfind('.'):], 'wb') as f:
f.write(html3.content)
k = k+1
except :
pass
Python展示图片
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
### 为展示图片调用包
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# 获取当前路径
path_current = os.getcwd()
picfolder = path_current + '/picture/'
# 定义展示图片函数
def picshow(inpath,inpic):
plt.imshow(Image.open(inpath + inpic),interpolation="bilinear")
plt.axis("off")
plt.show()
# 展示本次作业结果
picshow(picfolder,'01.png')
picshow(picfolder,'02.png')
picshow(picfolder,'03.png')
picshow(picfolder,'04.png')
picshow(picfolder,'05.png')