Python之美第十次作业

发表于 2023-04-22 更新于 2024-11-30

2023年04月22日星期六阴北京市北京师范大学,今天完成了第十次作业，同样还是在vim上完成，然后贴到Jupyter上，插入图片展示作业。同时今天获得vim整块代码移到的技巧，比如选中前十行，然后按shift+>右移，同时shift+<左移。第二个方法是命令模式，使用:m,n>将第m行到第n行，右移shiftwidth，同时:m,n<将第m行到第n行，左移shiftwidth,其中shiftwidth=4可以在/etc/vimrc中设置参数。

Python爬取bnu网站数据

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
# Author: 冯振华
# Version: 1.0
# Date: 2023年 04月 22日 星期六 13:22:34 CST
# Copyright © 2023 feng <feng@arch>
# Distributed under terms of the MIT license.
#
# 导入库
import os
import requests
from bs4 import BeautifulSoup
from requests.compat import urljoin

# 设置存储文件夹
dirname = './头条关注'
if not os.path.exists(dirname): #如果文件夹不存在，则创建
    os.mkdir(dirname)
# 伪装成浏览器
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'http://news.bnu.edu.cn/zx/ttgz/index.htm'
#
html = requests.get(url, headers=header)
html.encoding='utf-8'
# 把url内容给bs去解析
bsObj = BeautifulSoup(html.content, 'lxml')
lis = bsObj.find_all('li', class_="item-info01")
artnum = 0 
for li in lis:
    artnum = artnum + 1
    if artnum <=3:   #控制显示前三篇
        name = li.find('h3').text.strip()
        namepath = dirname +'/' + name 
        if not os.path.exists(namepath): #如果文件夹不存在，则创建
            os.mkdir(namepath)
        artlink = li.find('a')['href']
        url2 = urljoin(url,artlink)
        html2 = requests.get(url2,headers=header)
        bsObj2 = BeautifulSoup(html2.content, 'lxml')
        lis2 = bsObj2.find_all('div',class_="article")
        for para in lis2:
    ## 爬取文章内容
            try:
                paras = para.find_all('span')
                with open(namepath + '/' + name + '.txt', 'w') as f:
                    f.write(name+'\n')
                    for parasub in  paras:
                        f.write(parasub.text.strip()+'\n')
            except:
                print("没有发现文章!")
    ## 爬取文章图片
        lis3 = bsObj2.find_all('p')
        k = 1
        for picls in lis3:
            try:
                imglink = picls.find('img')['src']
                url3 = urljoin(url, imglink)
                html3 = requests.get(url3, headers=header) 
                with open(namepath+'/'+name + str(k) +imglink[imglink.rfind('.'):], 'wb') as f:
                    f.write(html3.content)
                    k = k+1
            except :
                pass

Python展示图片

### 为展示图片调用包
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# 获取当前路径
path_current = os.getcwd()
picfolder = path_current + '/picture/'
# 定义展示图片函数
def picshow(inpath,inpic):
    plt.imshow(Image.open(inpath + inpic),interpolation="bilinear")
    plt.axis("off")
    plt.show()
# 展示本次作业结果
picshow(picfolder,'01.png')
picshow(picfolder,'02.png')
picshow(picfolder,'03.png')
picshow(picfolder,'04.png')
picshow(picfolder,'05.png')