Python之美第八次作业

2023年04月09日星期日晴北京市北京师范大学, 昨天完成了Python之美课程的作业,此次封装了函数,自己感觉这才有程序的意思,同时也保留了一些有用的功能。在以后使用Python开发软件时有可能会用到,所以这里释出这次作业的源码。

1. 对第5次和第7次作业进行函数封装,按功能封装成如下函数:

  • 函数1:从txt文件中得到诗歌内容文字列表

  • 函数2:对文字列表做词频、排序以及打印结果

  • 函数3:打印集合操作结果

  • 函数4:词云的生成和绘制

Python HomeWork8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# 调用包
import operator
from os import path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator
import jieba

# 读取文件,并去除空值
def get_poems(name_txt):
with open(name_txt, encoding='utf-8') as f:
poems_text=f.read()
poems = poems_text.split('\n')
poems = list(filter(None,poems)) #去除空值,这一步很重要
return f.name , poems

#提取单个诗,并将歌名、朝代、作者、诗歌内容储存在single_poem里面,用逗号隔开
def mk_single(single_txt):
singled = []
for i in single_txt:
single_poem = i.split(' ')
singled.append(single_poem[3])
singled = ''.join(singled)
return singled

# 移除标点符号
def rm_symbol(words_txt):
unsymbol = []
for i in words_txt:
if i.isalpha():
unsymbol.append(i)
return unsymbol

# 创建字典
def mk_dict(dic_data):
mked_dic={}
qc_data=list(set(dic_data)) # 去除重复汉字,由于过于简单不单独封装函数
for i in qc_data:
mked_dic.setdefault(i,dic_data.count(i))
mked_dic = sorted(mked_dic.items(), key = operator.itemgetter(1))
mked_dic.reverse()
mked_dic=dict(mked_dic)
return mked_dic

# 统计词频
def tj_core(origin_data):
poems_ab=get_poems(origin_data)
poems_author=str(poems_ab[0])
poems_author=poems_author.split('.',1)
poems=poems_ab[1]
poems_sg=mk_single(poems)
poems_wd=rm_symbol(poems_sg)
poems_dic=mk_dict(poems_wd)
return poems_author[0], poems ,poems_dic

# 获取高频汉字
def tj_HFWD(tj_author,tj_poems,tj_dic,pl_min):
num_total=len(tj_poems) #取得诗歌篇数
tj_LS=[]
for i in tj_dic:
if tj_dic[i]/num_total >= pl_min:
tj_LS.append(i)
return set(tj_LS)

# 输出频率
def tj_frequence(tj_author,tj_poems,tj_dic,pl_min):
num_total=len(tj_poems) #取得诗歌篇数
print('---------BEGIN---------')
print(tj_author,'----汉字词频','(>=',pl_min,')')
for i in tj_dic:
if tj_dic[i]/num_total >= pl_min:
print(i, ' '*2, '{:.8f}'.format(tj_dic[i]/num_total)) #{:.nf}用来控制浮点数的位数
print('----------END----------')

# 定义词云生成
def pome_cloud(in_poem,in_mask,d,wd_num,wds_max,wdf_siz):
poems_ab=get_poems(in_poem)
poems_author=str(poems_ab[0])
poems_author=poems_author.split('.',1)
poems=poems_ab[1]
poems_sg=mk_single(poems)
if wd_num == 1:
poems_sg=' '.join(list(poems_sg))
elif wd_num == 2:
poems_sg=' '.join(jieba.cut(poems_sg))
mask_png = np.array(Image.open(path.join(d,in_mask)))
wa = WordCloud(background_color="white",
font_path='/usr/share/fonts/windows10/simsun.ttc',
max_words=wds_max,
mask=mask_png,
max_font_size=wdf_siz,
min_word_length=wd_num).generate(poems_sg)
print(poems_author[0],'----词云',wd_num)
plt.imshow(wa,interpolation="bilinear")
plt.axis("off")
plt.show()

####获取数据####
min_f=0.1
LB=tj_core('李白.txt')
DF=tj_core('杜甫.txt')
LB_frequent=tj_HFWD(LB[0],LB[1],LB[2],min_f)
DF_frequent=tj_HFWD(DF[0],DF[1],DF[2],min_f)

#### 输出第五次作业内容
### 输出字频(>min_f)
tj_frequence(LB[0],LB[1],LB[2],min_f)
print('\n')

tj_frequence(DF[0],DF[1],DF[2],min_f)
print('\n')

# 两位诗人都爱用的字
m = 1
print('两位诗人都爱用的字有{}个,这些字为:'.format(len(LB_frequent & DF_frequent)))
for i in LB_frequent & DF_frequent:
print(i, end = ' ')
if m % 10 == 0:
print()
m += 1
print('\n')

# 李白爱用但杜甫不爱用的字
n = 1
print('李白爱用但杜甫不爱用的字有{}个,这些字为:'.format(len(LB_frequent - DF_frequent)))
for i in LB_frequent - DF_frequent:
print(i, end = ' ')
if n % 10 == 0:
print()
n += 1
print('\n')

#杜甫爱用但李白不爱用的字
k = 1
print('杜甫爱用但李白不爱用的字有{}个,这些字为:'.format(len(DF_frequent - LB_frequent)))
for i in DF_frequent - LB_frequent:
print(i, end = ' ')
if k % 10 == 0:
print()
k += 1
print('\n')

#输出第七次作业内容
poem_path = '/home/feng'
poem_mask="libai.png"
mask_png = np.array(Image.open(path.join(poem_path,poem_mask)))
plt.imshow(mask_png,interpolation="bilinear")
plt.axis("off")
plt.show()
pome_cloud('李白.txt',poem_mask,poem_path,1,100,40)
pome_cloud('李白.txt',poem_mask,poem_path,2,100,40)
pome_cloud('杜甫.txt',poem_mask,poem_path,1,100,40)
pome_cloud('杜甫.txt',poem_mask,poem_path,2,100,40)

2. 利用map、filter、reduce、lambda函数,对一个元素为数字字符串的列表l,分别求出其中包含的奇数和偶数的乘积。

Python HomeWork2
1
2
3
4
5
6
# 输出第八次作业
import functools,operator
l=['1','2','3','14','20','5','7','39','150','6']
even_mul=functools.reduce(operator.mul,filter(lambda x: x%2 == 0 ,map(lambda x:int(x),l)))
odd_mul=functools.reduce(operator.mul,filter(lambda x: x%2 == 1 ,map(lambda x:int(x),l)))
print('偶数之积=',even_mul,'奇数之积=',odd_mul)