用python寫一個cnBeta閱讀器

我個人平時喜歡逛cnBeta和百度貼吧肪凛，我利用之前的寫百度貼吧客戶端的code攒钳，寫了一個cnBeta的閱讀器

用python寫一個百度貼吧客戶端

由于cnBeta http://www.cnbeta.com/ 電腦端廣告實在太多居灯，要想閱讀新聞和評論實在十分費(fèi)時啦扬，于是我用Python抓取手機(jī)版 http://m.cnbeta.com/wap 的內(nèi)容姻氨，方便大家閱讀钓辆。

功能與界面與我之前的百度python客戶端十分相似。

一打開便會顯示首頁的最新新聞肴焊，如果過想看第2頁的新聞則輸入s 2, 以此類推前联。

s 2

閱讀某一個新聞，則輸入t index , 比如查看index 為1 的新聞

t 1

不用你自己親自查看評論娶眷，程序會抓取所有評論直接顯示在文章下方似嗤，

輸入 b 可以返回新聞列表。

由于我自己不喜歡評論届宠，所以我沒有添加評論該新聞的功能烁落。想要加評論功能，也很簡單豌注，可以參考我的百度客戶端的文章伤塌。

新增預(yù)覽圖片功能

輸入pic，可以打開由Pyqt庫寫的一個小窗口轧铁，用來預(yù)覽該新聞內(nèi)的圖片每聪，并且可以上下翻頁。

一下附上code：

# coding=utf-8
import sys
import pycurl
import os
import time
from StringIO import StringIO
import re
import lxml.html
import unicodedata
from PyQt4.QtGui import *
from PyQt4 import QtGui
from colorama import Fore, Back, Style,init
from termcolor import colored


# class definition

class Example(QtGui.QWidget):


   def __init__(self,all_pic_list):
      super(Example, self).__init__()
      #self.url_list=['http://static.cnbetacdn.com/article/2017/0831/8eb7de909625140.png','http://static.cnbetacdn.com/article/2017/0831/7f11d5ec94fa123.png','http://static.cnbetacdn.com/article/2017/0831/1b6595175fb5486.jpg']
      self.url_list=all_pic_list
      self.current_pic_index=0
      self.initUI()
      #time.sleep(5)

   def initUI(self):
      QtGui.QToolTip.setFont(QtGui.QFont('Test', 10))
      self.setToolTip('This is a <b>QWidget</b> widget')

      # Show  image
      self.pic = QtGui.QLabel(self)
      self.pic.setGeometry(0, 0, 600, 500)
      #self.pic.setPixmap(QtGui.QPixmap("/home/lpp/Desktop/image1.png"))

      
      
      pixmap = QPixmap()
      data=self.retrieve_from_url(self.url_list[0])
      pixmap.loadFromData(data)
      self.pic.setPixmap(pixmap)
      #self.pic.setPixmap(QtGui.QPixmap.loadFromData(data))


      # Show button 
      btn_next = QtGui.QPushButton('Next', self)
      btn_next.setToolTip('This is a <b>QPushButton</b> widget')
      btn_next.resize(btn_next.sizeHint())
      btn_next.clicked.connect(self.fun_next)
      btn_next.move(300, 50)

      btn_prev = QtGui.QPushButton('Previous', self)
      btn_prev.setToolTip('This is a <b>QPushButton</b> widget')
      btn_prev.resize(btn_prev.sizeHint())
      btn_prev.clicked.connect(self.fun_prev)
      btn_prev.move(50, 50)



      self.setGeometry(300, 300, 500, 500)
      self.setWindowTitle('ImgViewer')
      self.show()

   def retrieve_from_url(self,pic_url):
      c = pycurl.Curl()
      c.setopt(pycurl.PROXY, 'http://192.168.87.15:8080')
      c.setopt(pycurl.PROXYUSERPWD, 'LL66269:')
      c.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_NTLM)
      buffer = StringIO()
      c.setopt(pycurl.URL, pic_url)
      c.setopt(c.WRITEDATA, buffer)
      c.perform()
      c.close()  
      data = buffer.getvalue()
      return data  

    # Connect button to image updating 
   def fun_next(self):
      if self.current_pic_index < len(self.url_list)-1:
         self.current_pic_index=self.current_pic_index+1
      else:
         self.current_pic_index=0


      pixmap = QPixmap()
      data=self.retrieve_from_url(self.url_list[self.current_pic_index])
      pixmap.loadFromData(data)
      self.pic.setPixmap(pixmap)
      #self.pic.setPixmap(QtGui.QPixmap( "/home/lpp/Desktop/image2.png"))


   def fun_prev(self):
      if self.current_pic_index > 0:
         self.current_pic_index=self.current_pic_index-1
      else:
         self.current_pic_index=len(self.url_list)-1

      pixmap = QPixmap()
      data=self.retrieve_from_url(self.url_list[self.current_pic_index])
      pixmap.loadFromData(data)
      self.pic.setPixmap(pixmap)
      #self.pic.setPixmap(QtGui.QPixmap( "/home/lpp/Desktop/image2.png"))


def main(all_pic_list):

   app = QtGui.QApplication(sys.argv)
   ex = Example(all_pic_list)
   sys.exit(app.exec_())


#---------------------------------------------
class Browser_cnbeta:
    c = pycurl.Curl()

    def __init__(self):
        os.system('cls')
        print """
        
               _                       _                     ____  _____ _____  _    
 __      _____| | ___ ___  _ __ ___   | |_ ___     ___ _ __ | __ )| ____|_   _|/ \   
 \ \ /\ / / _ \ |/ __/ _ \| '_ ` _ \  | __/ _ \   / __| '_ \|  _ \|  _|   | | / _ \  
  \ V  V /  __/ | (_| (_) | | | | | | | || (_) | | (__| | | | |_) | |___  | |/ ___ \ 
   \_/\_/ \___|_|\___\___/|_| |_| |_|  \__\___/   \___|_| |_|____/|_____| |_/_/   \_\
                                                                                     
made by bigtrace
http://www.reibang.com/p/f04e514c2902
7/20/2017
        """
        time.sleep(2)
        self.read_shouye(1)

    def wide_chars(self, s):
        # return the extra width for wide characters
        if isinstance(s, str):
            s = s.decode('utf-8')
        return sum(unicodedata.east_asian_width(x) in ('F', 'W') for x in s)

    def read_shouye(self, index):
        
        os.system('cls')
        self.c.setopt(pycurl.PROXY, 'http://192.168.87.15:8080')
        self.c.setopt(pycurl.PROXYUSERPWD, 'LL66269:')
        self.c.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_NTLM)
        USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
        self.c.setopt(self.c.FOLLOWLOCATION, 1)
        self.c.setopt(pycurl.VERBOSE, 0)
        self.c.setopt(pycurl.FAILONERROR, True)
        self.c.setopt(pycurl.USERAGENT, USER_AGENT)

        # ------------------- Need to use each post page's own cookie to login
        url_tbs = 'http://m.cnbeta.com/wap/index.htm?page=' + str(index)
        print colored(url_tbs,'blue')

        print (colored("\n---------------------",'green'))
        buffer = StringIO()
        self.c.setopt(pycurl.URL, url_tbs)
        self.c.setopt(self.c.WRITEDATA, buffer)
        self.c.perform()
        body = buffer.getvalue().decode('utf-8', 'ignore')
        doc = lxml.html.fromstring(body)
        news_list = doc.xpath("http://div[@class='list']")

        # http://m.cnbeta.com/wap/view/633687.htm



        Header_list = []
        link_list = []
        display_shouye = []

        self.header_max_width = 12
        self.title_max_width = 70
        i = 0
        for each_news in news_list:
            link = each_news.xpath(".//a/@href")[0]
            link_url = "http://m.cnbeta.com" + link
            title = each_news.xpath(".//a")[0].text_content()

            Header = "index " + colored(str(i),'yellow')

            each_title = ":   " + title
            Header_list.append(title)
            link_list.append(link_url)

            Header_fmt = u'{0:<%s}' % (self.header_max_width - self.wide_chars(Header))
            Title_fmt = u'{0:<%s}' % (self.title_max_width - self.wide_chars(each_title))
            each_display = ""
            try:
                each_display = (Header_fmt.format(Header) + Title_fmt.format(each_title)).encode("gb18030")
                # print (Header_fmt.format(Header)+Title_fmt.format(each_title)).encode("gb18030")

            except:
                each_display = (Header_fmt.format(Header) + "Title can't be displayed").encode("gb18030")
                # print (Header_fmt.format(Header)+"Title can't be displayed").encode("gb18030")


            print  each_display

            display_shouye.append(each_display)
            print ""
            i = i + 1

        self.tiezi_link = link_list
        self.shouye_titles = Header_list
        self.display_shouye_list = display_shouye

        print (colored("\n---------------------",'green'))

    def read_each_news(self, index):
        os.system('cls')
        link = self.tiezi_link[int(index)]
        title = self.shouye_titles[int(index)]
        print "===================================================\n\n\n"

        print colored(title, 'magenta')+ colored("  <" + link+ "> \n",'blue')  


        buffer = StringIO()
        self.c.setopt(pycurl.URL, link)
        self.c.setopt(self.c.WRITEDATA, buffer)
        self.c.perform()
        body = buffer.getvalue().decode('utf-8', 'ignore')
        doc = lxml.html.fromstring(body)
        title = doc.xpath("http://div[@class='title']")[0].text_content()
        time = doc.xpath("http://div[@class='time']/span")
        time_subtitle = ""
        for each_span in time:
            time_subtitle = time_subtitle + each_span.text_content()

        # print (title).encode("gb18030")

        print ""

        print colored((time_subtitle).encode("gb18030"),'cyan')

        print "" 

        content = doc.xpath("http://div[@class='content']/p")
        self.current_thread_pic_list=[]
        for each_paragraph in content:
            print ""
            text_content = each_paragraph.text_content().replace(u'\xa0', u' ')   # remove \xa0 from string
            print text_content
            img = each_paragraph.xpath(".//img/@src")
            for each_img in img:
                print colored("<img url: "+each_img+">",'yellow') 
                self.current_thread_pic_list.append(each_img)

        blockquote = doc.xpath("http://div[@class='content']/blockquote")
        j = 1
        for each_blockquote in blockquote:
            print "blockquote <" + str(j) + "> ~~~~~~~~~~~\n"
            print each_blockquote.text_content()
            print "~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
            j = j + 1

        self.view_comment(link)

    def Get_Back_To_shouye(self):
        os.system('cls')
        for each_display in self.display_shouye_list:
            print each_display

    def exit(self):
        self.c.close()
        os.system('cls')
        print """
 _                    _                 
| |                  | |                
| |__  _   _ _____   | |__  _   _ _____ 
|  _ \| | | | ___ |  |  _ \| | | | ___ |
| |_) ) |_| | ____|  | |_) ) |_| | ____|
|____/ \__  |_____)  |____/ \__  |_____)
      (____/               (____/       

"""
        time.sleep(1)
        os.system('cls')

    def view_comment(self, url):
        # http://m.cnbeta.com/wap/comment/633621.htm
        tid = re.search(r"(\d+)", url).group(1)
        comment_url = "http://m.cnbeta.com/wap/comment/" + str(tid) + ".htm?page="

        buffer = StringIO()
        self.c.setopt(pycurl.URL, comment_url + "1")
        self.c.setopt(self.c.WRITEDATA, buffer)
        self.c.perform()
        body = buffer.getvalue().decode('utf-8', 'ignore')
        doc = lxml.html.fromstring(body)


        comment_all = doc.xpath("http://div[@class='content']")[0].text_content()
        print colored("\n--------------- comment ---------------",'green')
        #print comment_all
        print comment_all

        print colored("--------------- finished ---------------",'green')
        


    def view_image(self):
        print "launch picture viewer..."
        viewer_app = QtGui.QApplication(sys.argv)
        ex = Example(self.current_thread_pic_list)
        sys.exit(viewer_app.exec_())


app = Browser_cnbeta()

while True:
    print """



    """
    nb = raw_input('Give me your command: \n')
    try:
        if nb.startswith('s ') == True:
            index = re.search(r"s (\d+)", nb).group(1)
            app.read_shouye(index)
        elif nb.startswith('t ') == True:
            index = re.search(r"t\s+(\d+)", nb).group(1)
            app.read_each_news(index)
        elif nb == "b":
            app.Get_Back_To_shouye()
        elif nb =="c":
            os.system('cls')  # on windows
        elif nb == "e":
            break
        elif nb == "pic":
            app.view_image()
        else:
            print "type correct command"
    except:
        print ""




app.exit()

最后編輯于：2017.12.09 01:30:53

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末，一起剝皮案震驚了整個濱河市药薯，隨后出現(xiàn)的幾起案子绑洛，更是在濱河造成了極大的恐慌，老刑警劉巖童本，帶你破解...
沈念sama閱讀 217,084評論 6贊 503
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件真屯，死亡現(xiàn)場離奇詭異，居然都是意外死亡穷娱，警方通過查閱死者的電腦和手機(jī)绑蔫，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 92,623評論 3贊 392
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進(jìn)店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來泵额，“玉大人晾匠，你說我怎么就攤上這事√莞眨” “怎么了？”我有些...
開封第一講書人閱讀 163,450評論 0贊 353
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵薪寓，是天一觀的道長亡资。經(jīng)常有香客問我，道長向叉，這世上最難降的妖魔是什么锥腻？我笑而不...
開封第一講書人閱讀 58,322評論 1贊 293
?港島之戀（遺憾婚禮）
正文為了忘掉前任，我火速辦了婚禮母谎，結(jié)果婚禮上瘦黑，老公的妹妹穿的比我還像新娘。我一直安慰自己奇唤，他們只是感情好幸斥，可當(dāng)我...
茶點故事閱讀 67,370評論 6贊 390
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布。她就那樣靜靜地躺著咬扇，像睡著了一般甲葬。火紅的嫁衣襯著肌膚如雪。梳的紋絲不亂的頭發(fā)上懈贺，一...
開封第一講書人閱讀 51,274評論 1贊 300
城市分裂傳說
那天经窖，我揣著相機(jī)與錄音，去河邊找鬼梭灿。笑死画侣，一個胖子當(dāng)著我的面吹牛，可吹牛的內(nèi)容都是我干的堡妒。我是一名探鬼主播配乱，決...
沈念sama閱讀 40,126評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼，長吁一口氣：“原來是場噩夢啊……” “哼！你這毒婦竟也來了宪卿？” 一聲冷哼從身側(cè)響起的诵，我...
開封第一講書人閱讀 38,980評論 0贊 275
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤，失蹤者是張志新（化名）和其女友劉穎佑钾，沒想到半個月后西疤，有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體，經(jīng)...
沈念sama閱讀 45,414評論 1贊 313
?護(hù)林員之死
正文獨居荒郊野嶺守林人離奇死亡休溶，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 37,599評論 3贊 334
?白月光啟示錄
正文我和宋清朗相戀三年代赁，在試婚紗的時候發(fā)現(xiàn)自己被綠了。大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片兽掰。...
茶點故事閱讀 39,773評論 1贊 348
活死人
序言：一個原本活蹦亂跳的男人離奇死亡芭碍，死狀恐怖，靈堂內(nèi)的尸體忽然破棺而出孽尽，到底是詐尸還是另有隱情窖壕，我是刑警寧澤，帶...
沈念sama閱讀 35,470評論 5贊 344
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布杉女，位于F島的核電站瞻讽，受9級特大地震影響，放射性物質(zhì)發(fā)生泄漏熏挎。R本人自食惡果不足惜速勇，卻給世界環(huán)境...
茶點故事閱讀 41,080評論 3贊 327
男人毒藥：我在死后第九天來索命
文/蒙蒙一、第九天我趴在偏房一處隱蔽的房頂上張望坎拐。院中可真熱鬧烦磁，春花似錦、人聲如沸哼勇。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,713評論 0贊 22
一樁弒父案，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽积担。三九已至院溺，卻和暖如春，著一層夾襖步出監(jiān)牢的瞬間磅轻，已是汗流浹背珍逸。一陣腳步聲響...
開封第一講書人閱讀 32,852評論 1贊 269
情欲美人皮
我被黑心中介騙來泰國打工，沒想到剛下飛機(jī)就差點兒被人妖公主榨干…… 1. 我叫王不留聋溜，地道東北人谆膳。一個月前我還...
沈念sama閱讀 47,865評論 2贊 370
代替公主和親
正文我出身青樓，卻偏偏與公主長得像撮躁，于是被迫代替她去往敵國和親漱病。傳聞我的和親對象是個殘疾皇子，可洞房花燭夜當(dāng)晚...
茶點故事閱讀 44,689評論 2贊 354

用python寫一個cnBeta閱讀器

新增預(yù)覽圖片功能

推薦閱讀更多精彩內(nèi)容