import chardet
r = requests.get("https://weiboscope.jmsc.hku.hk/latest.php")
print(chardet.detect(r.content))
r.encoding="UTF-8"
Xpath
Get attributes of element
from lxml import html
r = requests.get("https://weiboscope.jmsc.hku.hk/latest.php")
tree = html.fromstring(r.text)
urllist = tree.xpath('//li/a')
contentlist = tree.xpath('//li/text()')
## get text and attribute
print(urllist[0].text)
print(urllist[0].attrib['href'])