Commit cc23db60 authored by reeves's avatar reeves 🛀🏻

添加注释

parent 059c3a22
...@@ -22,7 +22,8 @@ def save_to_db(url, html): ...@@ -22,7 +22,8 @@ def save_to_db(url, html):
} }
last_id = db.table_insert('sinatilte', item) last_id = db.table_insert('sinatilte', item)
# 输入为 respond
# 利用了xpath 分析 //a 获取根节点中所有的a标签,最后组装为字典数组
def analysisA(htmlText): def analysisA(htmlText):
linklist = htmlText.xpath("//a") linklist = htmlText.xpath("//a")
news_linkDics = [] news_linkDics = []
...@@ -42,7 +43,8 @@ def crawl(): ...@@ -42,7 +43,8 @@ def crawl():
} }
htmlText = requests.get(sina_url, headers=_headers).text.encode( htmlText = requests.get(sina_url, headers=_headers).text.encode(
'iso-8859-1').decode('utf-8') 'iso-8859-1').decode('utf-8')
# 提取response 中的a标签 的href 和 文本,
# 得到的结果为 [{"url":“text"}]
DicArray = analysisA(etree.HTML(htmlText)) DicArray = analysisA(etree.HTML(htmlText))
news_links = [] news_links = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment