Commit cc23db60 authored by reeves's avatar reeves 🛀🏻

添加注释

parent 059c3a22
......@@ -22,7 +22,8 @@ def save_to_db(url, html):
}
last_id = db.table_insert('sinatilte', item)
# 输入为 respond
# 利用了xpath 分析 //a 获取根节点中所有的a标签,最后组装为字典数组
def analysisA(htmlText):
linklist = htmlText.xpath("//a")
news_linkDics = []
......@@ -42,7 +43,8 @@ def crawl():
}
htmlText = requests.get(sina_url, headers=_headers).text.encode(
'iso-8859-1').decode('utf-8')
# 提取response 中的a标签 的href 和 文本,
# 得到的结果为 [{"url":“text"}]
DicArray = analysisA(etree.HTML(htmlText))
news_links = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment