【Python爬蟲】爬取城市

import requests,json,csv
from lxml import etree


# url = '/match/team_players.htm?divisionId=874902863023837184&teamId=892361870411960321'
# res = requests.get(url).text
# print(res)
# select = etree.HTML(res)
# name = select.xpath('//span[@class="player-name-value player-short-words"]/text()')
# print(name)

# url = "/match/team_players_json.htm?divisionId=874902863023837184&teamId=892361870411960321&page=2"
# res = requests.post(url).text
# data = json.loads(res)['data']
# count = data['count']
# print(count)


def post_team_name(url):
    try:
        res = requests.post(url.format(1)).text
        data = json.loads(res)['data']
        count = data['count']
        # print(count % 10)
        #如果最后一頁不為0赘那,整除后需要加1刑桑,在加1是為了for的時候最后
        pages = count // 10 + 2 if count % 10 > 0 else count // 10 + 1
        # print(pages)
        for page in range(1,pages):
            page_res = requests.post(url.format(page)).text
            page_data = json.loads(page_res)['data']
            for list in page_data['list']:
                teamName = list['teamName']
                teamId = list['teamId']
                teamPic = list['teamPic']
                # print(teamName,teamId,teamPic)

                #因為網(wǎng)頁有json數(shù)據(jù)還有網(wǎng)頁的可以用xpath來解析網(wǎng)頁
                #使用json解析
                # post_players_information(teamId,teamName)

                #使用xpath解析
                post_xpath_players_information(teamId,teamName)
    except Exception as e:
        print("post_team_name函數(shù)解析錯誤 錯誤為:",e)

#使用Json解析獲得數(shù)據(jù)
def post_players_information(teamId,teamName):
    try:
        play_url = "/match/team_players_json.htm?divisionId=874902863023837184&teamId={}".format(
            teamId)
        res = requests.post(play_url).text
        play_data = json.loads(res)['data']
        count = play_data['count']
        # print(count)
        pages = count // 10 + 2 if count % 10 > 0 else count // 10 + 1
        # print(pages)
        for page in range(1,pages):
            url = "/match/team_players_json.htm?divisionId=874902863023837184&teamId={}&page={}".format(
                teamId,page)
            res = requests.post(url).text
            play_data = json.loads(res)['data']
            for list in play_data['list']:
                print(teamName, teamId, count,list['nickname'],list['clothNo'],list['area'],list['age'],list['height'],list['weight'])
                writer.writerow((teamName, teamId, count,list['nickname'],list['clothNo'],list['area'],list['age'],list['height'],list['weight']))
    except Exception as e:
        print("post_players_information函數(shù)解析錯誤 錯誤為:",e)

#使用xpath解析獲得數(shù)據(jù)
def post_xpath_players_information(teamId,teamName):

    try:
        play_url = "/match/team_players_json.htm?divisionId=874902863023837184&teamId={}".format(
            teamId)
        res = requests.post(play_url).text
        play_data = json.loads(res)['data']
        count = play_data['count']
        # print(count)
        pages = count // 10 + 2 if count % 10 > 0 else count // 10 + 1
        # print(pages)
        for page in range(1, pages):
            url = "/match/team_players.htm?divisionId=874902863023837184&teamId={}&page={}".format(teamId,page)
            res = requests.post(url).text
            select = etree.HTML(res)
            data = select.xpath('//div[@class="player-detail"]')
            # 名字
            names = select.xpath('//span[@class="player-name-value player-short-words"]/text()')
            # 球衣號
            nums = select.xpath('//span[@class="player-num-value"]/text()')
            # 地區(qū)
            addresss = select.xpath('//span[@class="player-address-value player-short-words"]/text()')
            # 年齡
            ages = select.xpath('//span[@class="player-age-value"]/text()')
            # 身高
            heights = select.xpath('//span[@class="player-height-value"]/text()')
            # 體重
            weights = select.xpath('//span[@class="player-weight-value"]/text()')
            for i in range(0,len(nums)):
                name = names[i]
                num = nums[i]
                address = addresss[i]
                age = ages[i]
                height = heights[i]
                weight = weights[i]
                print(teamName, teamId, count, name, num, address, age, height, weight)

                writer.writerow((teamName, teamId, count, name, num, address, age, height, weight))
    except Exception as e:
        print("post_xpath_players_information函數(shù)解析錯誤 錯誤為:", e)



#隊伍對陣信息
def post_team_game(url):
    try:
        res = requests.post(url).text
        select = etree.HTML(res)

        game_times = select.xpath('//div[@class="event-time"]/span[1]/text()')
        game_sorts = select.xpath('//div[@class="event-time"]/span[2]/text()')
        team_one_names = select.xpath('//div[@class="team-one"]/span[@class="team-name"]/text()')
        team_one_scores = select.xpath('//div[@class="team-one"]/span[@class="team-score"]/text()')
        team_two_names = select.xpath('//div[@class="team-two"]/span[@class="team-name"]/text()')
        team_two_scores = select.xpath('//div[@class="team-two"]/span[@class="team-score"]/text()')

        for i in range(0,len(game_times)):
            game_time = game_times[i]
            game_sort = game_sorts[i]
            team_one_name = team_one_names[i]
            team_one_score = team_one_scores[i]
            team_two_name = team_two_names[i]
            team_two_score = team_two_scores[i]
            print('{} {}, {} {}分 對陣 {} {}分'.format(game_time,game_sort,team_one_name,team_one_score,team_two_name,team_two_score))

    except Exception as e:
        print("post_team_game函數(shù)解析錯誤 錯誤為:", e)



if __name__ == "__main__":
    f = open('chengshichuanqi.csv', 'w+', encoding='utf-8')
    writer = csv.writer(f)
    writer.writerow(('戰(zhàn)隊名稱', '戰(zhàn)隊id', '隊員數(shù)', '隊員名', '球衣號', '隊員地區(qū)','隊員年齡','隊員身高','隊員體重'))
    base_url = '/match/join_teams_json.htm?divisionId=874902863023837184&page={}'
    game_url = '/match/success_events.htm?divisionId=874902863023837184'
    post_team_game(game_url)
    post_team_name(base_url)
屏幕快照 2017-08-08 下午12.57.16.png
屏幕快照 2017-08-08 下午8.27.07.png
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末,一起剝皮案震驚了整個濱河市募舟,隨后出現(xiàn)的幾起案子祠斧,更是在濱河造成了極大的恐慌,老刑警劉巖拱礁,帶你破解...
    沈念sama閱讀 217,657評論 6 505
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件琢锋,死亡現(xiàn)場離奇詭異,居然都是意外死亡呢灶,警方通過查閱死者的電腦和手機吴超,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 92,889評論 3 394
  • 文/潘曉璐 我一進店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來鸯乃,“玉大人鲸阻,你說我怎么就攤上這事∮” “怎么了鸟悴?”我有些...
    開封第一講書人閱讀 164,057評論 0 354
  • 文/不壞的土叔 我叫張陵,是天一觀的道長宏蛉。 經(jīng)常有香客問我遣臼,道長,這世上最難降的妖魔是什么拾并? 我笑而不...
    開封第一講書人閱讀 58,509評論 1 293
  • 正文 為了忘掉前任揍堰,我火速辦了婚禮,結(jié)果婚禮上嗅义,老公的妹妹穿的比我還像新娘屏歹。我一直安慰自己,他們只是感情好之碗,可當(dāng)我...
    茶點故事閱讀 67,562評論 6 392
  • 文/花漫 我一把揭開白布蝙眶。 她就那樣靜靜地躺著,像睡著了一般。 火紅的嫁衣襯著肌膚如雪幽纷。 梳的紋絲不亂的頭發(fā)上式塌,一...
    開封第一講書人閱讀 51,443評論 1 302
  • 那天,我揣著相機與錄音友浸,去河邊找鬼峰尝。 笑死,一個胖子當(dāng)著我的面吹牛收恢,可吹牛的內(nèi)容都是我干的武学。 我是一名探鬼主播,決...
    沈念sama閱讀 40,251評論 3 418
  • 文/蒼蘭香墨 我猛地睜開眼伦意,長吁一口氣:“原來是場噩夢啊……” “哼火窒!你這毒婦竟也來了?” 一聲冷哼從身側(cè)響起驮肉,我...
    開封第一講書人閱讀 39,129評論 0 276
  • 序言:老撾萬榮一對情侶失蹤熏矿,失蹤者是張志新(化名)和其女友劉穎,沒想到半個月后缆八,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體曲掰,經(jīng)...
    沈念sama閱讀 45,561評論 1 314
  • 正文 獨居荒郊野嶺守林人離奇死亡疾捍,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 37,779評論 3 335
  • 正文 我和宋清朗相戀三年奈辰,在試婚紗的時候發(fā)現(xiàn)自己被綠了。 大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片乱豆。...
    茶點故事閱讀 39,902評論 1 348
  • 序言:一個原本活蹦亂跳的男人離奇死亡奖恰,死狀恐怖,靈堂內(nèi)的尸體忽然破棺而出宛裕,到底是詐尸還是另有隱情瑟啃,我是刑警寧澤,帶...
    沈念sama閱讀 35,621評論 5 345
  • 正文 年R本政府宣布揩尸,位于F島的核電站,受9級特大地震影響错负,放射性物質(zhì)發(fā)生泄漏勇边。R本人自食惡果不足惜,卻給世界環(huán)境...
    茶點故事閱讀 41,220評論 3 328
  • 文/蒙蒙 一识颊、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧祥款,春花似錦、人聲如沸刃跛。這莊子的主人今日做“春日...
    開封第一講書人閱讀 31,838評論 0 22
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽谨敛。三九已至滤否,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間炊甲,已是汗流浹背欲芹。 一陣腳步聲響...
    開封第一講書人閱讀 32,971評論 1 269
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留颈娜,地道東北人浙宜。 一個月前我還...
    沈念sama閱讀 48,025評論 2 370
  • 正文 我出身青樓,卻偏偏與公主長得像同仆,于是被迫代替她去往敵國和親。 傳聞我的和親對象是個殘疾皇子俗批,可洞房花燭夜當(dāng)晚...
    茶點故事閱讀 44,843評論 2 354

推薦閱讀更多精彩內(nèi)容