Go語言基礎(chǔ)08——HTTP編程

請求報文格式分析

package main

import (
    "fmt"
    "net"
)

func main() {
    fmt.Println("Http請求包格式演示案例")
    listener, err := net.Listen("tcp", "127.0.0.1:8000") //監(jiān)聽
    if err != nil {
        fmt.Println("err:", err)
        return
    }

    defer listener.Close() //關(guān)閉

    for {
        //阻塞,等待用戶連接
        conn, err := listener.Accept()
        if err != nil {
            fmt.Println("err:", err)
            return
        }
        // 接受用戶請求
        buff := make([]byte, 1024)
        n, err1 := conn.Read(buff)
        if err1 != nil {
            fmt.Println("err1:", err1)
            continue
        }

        fmt.Println("buff = ", string(buff[:n]))
        defer conn.Close() //關(guān)閉當(dāng)前用戶鏈接
    }

}

瀏覽器訪問:http://127.0.0.1:8000/
控制臺輸出:

GET / HTTP/1.1
Host: 127.0.0.1:8000
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9

響應(yīng)報文測試方法

package main

import (
    "fmt"
    "net/http"
)

func myHandler(w http.ResponseWriter, r *http.Request) {
    w.Write([]byte("hello go!"))
}

func main() {
    fmt.Println("響應(yīng)報文格式演示案例")
    http.HandleFunc("/go", myHandler)
    //在指定的地址進(jìn)行監(jiān)聽
    http.ListenAndServe("127.0.0.1:8000", nil)
}

瀏覽器訪問:http://127.0.0.1:8000/go
頁面顯示:hello go!

http客戶端編程

訪問百度

package main

import (
    "fmt"
    "net/http"
)

func main() {
    fmt.Println("http編程演示案例")
    resp, err := http.Get("https://www.baidu.com") //func Get(url string) (resp *Response, err error)
    if err != nil {
        fmt.Printf("err:", err)
        return
    }
    defer resp.Body.Close()
    fmt.Println("Status = ", resp.Status)
    fmt.Println("StatusCode = ", resp.StatusCode)
    fmt.Println("Header = ", resp.Header)
    fmt.Println("Body = ", resp.Body)
    
}

輸出結(jié)果:

Status =  200 OK
StatusCode =  200
Header =  map[Accept-Ranges:[bytes] Cache-Control:[no-cache] Connection:[Keep-Alive] Content-Length:[227] Content-Type:[text/html] Date:[Wed, 20 Mar 2019 11:10:52 GMT] Etag:["5c7cdb1f-e3"] Last-Modified:[Mon, 04 Mar 2019 08:00:31 GMT] P3p:[CP=" OTI DSP COR IVA OUR IND COM "] Pragma:[no-cache] Server:[BWS/1.1] Set-Cookie:[BD_NOT_HTTPS=1; path=/; Max-Age=300 BIDUPSID=1EB8D042488157FB56779477283469A8; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com PSTM=1553080252; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com] Strict-Transport-Security:[max-age=0] X-Ua-Compatible:[IE=Edge,chrome=1]]
Body =  &{0xc000034080 {0 0} false <nil> 0x60dd50 0x60dcd0}

百度貼吧爬蟲

package main

import (
    "fmt"
    "net/http"
    "os"
    "strconv"
    //"os"
)

func main() {
    fmt.Println("百度貼吧爬蟲編程演示案例")
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
    var start, end int
    fmt.Printf("輸入起始頁:")
    fmt.Scan(&start)
    fmt.Printf("輸入結(jié)束頁:")
    fmt.Scan(&end)
    doWork(start, end)
}

func doWork(start, end int) {
    fmt.Printf("正在爬去 %d - %d 頁的數(shù)據(jù)\n", start, end)
    for i := start; i <= end; i++ {
        getPage(i)
    }
}

func getPage(page int) {
    var url string
    url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
    fmt.Println("url = ", url)
    res, err := httpGet(url)
    if err != nil {
        fmt.Println("err:", err)
        return
    }
    // fmt.Println("res:", res)
    // 把內(nèi)容寫在文件里 1.html
    fileName := strconv.Itoa(page) + ".html"
    f1, err1 := os.Create(fileName)
    if err1 != nil {
        fmt.Println("err1:", err1)
        return
    }
    f1.WriteString(res)

}
func httpGet(url string) (res string, err error) {
    resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
    if err1 != nil {
        fmt.Println("err1:", err1)
        err = err1
        return
    }
    defer resp.Body.Close()

    buff := make([]byte, 1024)
    for {
        n, _ := resp.Body.Read(buff) //(n int, err error)
        // if err2 != nil {
        //  fmt.Println("err2:", err2)
        //  err = err2
        //  return
        // }// 這里竟然不能這么寫?畔勤??
        if n == 0 {
            fmt.Println("讀取結(jié)束:")
            break
        }
        res += string(buff[:n])

    }

    return

}

并發(fā)版網(wǎng)絡(luò)爬蟲:

package main

import (
    "fmt"
    "net/http"
    "os"
    "strconv"
    //"os"
)

var mPage = make(chan int)

func main() {
    fmt.Println("百度貼吧爬蟲編程演示案例")
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
    var start, end int
    fmt.Printf("輸入起始頁:")
    fmt.Scan(&start)
    fmt.Printf("輸入結(jié)束頁:")
    fmt.Scan(&end)
    doWork(start, end)
}

func doWork(start, end int) {
    fmt.Printf("正在爬去 %d - %d 頁的數(shù)據(jù)\n", start, end)

    for i := start; i <= end; i++ {
        go getPage(i)
    }

    for i := start; i <= end; i++ {
        fmt.Printf("第 %d頁 爬去完成\n", <-mPage)
    }
}

func getPage(page int) {
    var url string
    url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
    //fmt.Println("url = ", url)
    fmt.Printf("開始爬去第 %d 頁,%s\n", page, url)
    res, err := httpGet(url)
    if err != nil {
        fmt.Println("err:", err)
        return
    }
    mPage <- page
    // fmt.Println("res:", res)
    // 把內(nèi)容寫在文件里 1.html
    fileName := strconv.Itoa(page) + ".html"
    f1, err1 := os.Create(fileName)
    if err1 != nil {
        fmt.Println("err1:", err1)
        return
    }
    f1.WriteString(res)

}
func httpGet(url string) (res string, err error) {
    resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
    if err1 != nil {
        fmt.Println("err1:", err1)
        err = err1
        return
    }
    defer resp.Body.Close()

    buff := make([]byte, 1024)
    for {
        n, _ := resp.Body.Read(buff) //(n int, err error)
        // if err2 != nil {
        //  fmt.Println("err2:", err2)
        //  err = err2
        //  return
        // }// 這里竟然不能這么寫冶忱??食棕?
        if n == 0 {
            //fmt.Println("讀取結(jié)束")
            break
        }
        res += string(buff[:n])

    }

    return

}

段子爬蟲

package main

import (
    "fmt"
    "net/http"
    "os"
    "regexp"
    "strconv"
    //"os"
)

var mPage = make(chan int)

func main() {
    fmt.Println("段子爬蟲編程演示案例")
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
    var start, end int

    // fmt.Printf("輸入起始頁:")
    // fmt.Scan(&start)
    // fmt.Printf("輸入結(jié)束頁:")
    // fmt.Scan(&end)
    start = 1
    end = 1

    doWork(start, end)
}

func doWork(start, end int) {
    fmt.Printf("正在爬去 %d - %d 頁的數(shù)據(jù)\n", start, end)

    for i := start; i <= end; i++ {
        go getPage(i)
    }

    for i := start; i <= end; i++ {
        fmt.Printf("第 %d頁 爬去完成\n", <-mPage)
    }
}

func getPage(page int) {
    var url string
    url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
    url = "https://www.pengfue.com/index_" + strconv.Itoa(page) + ".html"
    //fmt.Println("url = ", url)
    fmt.Printf("開始爬去第 %d 頁,%s\n", page, url)
    res, err := httpGet(url)
    if err != nil {
        fmt.Println("err:", err)
        return
    }

    //<a  target="_blank">游泳圈</a>
    reg := regexp.MustCompile(`<h1 class="dp-b"><a href="(?s:(.*?))"`)
    if reg == nil {
        fmt.Println("regexp error.")
        return
    }
    urls := reg.FindAllStringSubmatch(res, -1)

    fileTitle := make([]string, 0)
    fileContent := make([]string, 0)

    // fmt.Println("urls:", urls)
    for k, v := range urls {
        fmt.Println("k,v:", k, v[1])
        res, err := httpGet(v[1])

        if err != nil {
            fmt.Println("err:", err)
            return
        }

        // 處理title
        reg := regexp.MustCompile(`<h1>(?s:(.*?))</h1>`)
        if reg == nil {
            fmt.Println("regexp error.")
            return
        }
        titles := reg.FindAllStringSubmatch(res, 1)
        for _, title := range titles {
            fmt.Println("title:", title[1]) //this is title.
            // 把內(nèi)容存儲到文件中
            fileTitle = append(fileTitle, title[1])
        }

        // 處理content
        regContent := regexp.MustCompile(`<div class="content-txt pt10">(?s:(.*?))<a id="prev"`)
        if regContent == nil {
            fmt.Println("regexp error.")
            return
        }
        contents := regContent.FindAllStringSubmatch(res, 1)
        for _, content := range contents {
            fmt.Println("content:", content[1]) //this is content.
            // 把內(nèi)容存儲到文件中
            fileContent = append(fileContent, content[1])
        }

    }

    //fmt.Println("fileTitle = ", fileTitle)
    //fmt.Println("fileContent = ", fileContent)

    saveToFile(page, fileTitle, fileContent)

    // fmt.Println("res:", res)

    mPage <- page

}

func saveToFile(page int, fileTitle, fileContent []string) {
    // 把內(nèi)容寫在文件里 1.html
    fileName := strconv.Itoa(page) + ".txt"
    f1, err1 := os.Create(fileName)
    if err1 != nil {
        fmt.Println("err1:", err1)
        return
    }

    defer f1.Close()

    n := len(fileTitle)
    for i := 0; i < n; i++ {
        f1.WriteString(fileTitle[i] + "\n")
        f1.WriteString(fileContent[i] + "\n")
        f1.WriteString("===================================================================\n")
    }

}

func httpGet(url string) (res string, err error) {
    resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
    if err1 != nil {
        fmt.Println("err1:", err1)
        err = err1
        return
    }
    defer resp.Body.Close()

    buff := make([]byte, 1024)
    for {
        n, _ := resp.Body.Read(buff) //(n int, err error)
        // if err2 != nil {
        //  fmt.Println("err2:", err2)
        //  err = err2
        //  return
        // }// 這里竟然不能這么寫朗和?错沽?簿晓?
        if n == 0 {
            //fmt.Println("讀取結(jié)束")
            break
        }
        res += string(buff[:n])

    }

    return

}

還需要處理title和content中包含的特殊格式。

并發(fā)的爬蟲:

package main

import (
    "fmt"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
    //"os"
)

var mPage = make(chan int)

func main() {
    fmt.Println("---段子爬蟲編程演示案例---")
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
    // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
    var start, end int

    fmt.Printf("輸入起始頁:")
    fmt.Scan(&start)
    fmt.Printf("輸入結(jié)束頁:")
    fmt.Scan(&end)
    // start = 1
    // end = 1

    doWork(start, end)
}

func doWork(start, end int) {
    fmt.Printf("正在爬去 %d - %d 頁的數(shù)據(jù)...\n", start, end)

    for i := start; i <= end; i++ {
        go getPage(i)
    }

    for i := start; i <= end; i++ {
        fmt.Printf("第【%d】頁 爬取完成\n", <-mPage)
    }
}

func getPage(page int) {
    var url string
    url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
    url = "https://www.pengfue.com/index_" + strconv.Itoa(page) + ".html"
    //fmt.Println("url = ", url)
    fmt.Printf("開始爬去第【%d】頁:%s\n", page, url)
    res, err := httpGet(url)
    if err != nil {
        fmt.Println("err:", err)
        return
    }

    //<a  target="_blank">游泳圈</a>
    reg := regexp.MustCompile(`<h1 class="dp-b"><a href="(?s:(.*?))"`)
    if reg == nil {
        fmt.Println("regexp error.")
        return
    }
    urls := reg.FindAllStringSubmatch(res, -1)

    fileTitle := make([]string, 0)
    fileContent := make([]string, 0)
    fileUrl := make([]string, 0)
    // fmt.Println("urls:", urls)
    for k, v := range urls {
        fmt.Println("url:", k, v[1])
        res, err := httpGet(v[1])

        if err != nil {
            fmt.Println("err:", err)
            return
        }

        // 處理title
        reg := regexp.MustCompile(`<h1>(?s:(.*?))</h1>`)
        if reg == nil {
            fmt.Println("regexp error.")
            return
        }
        titles := reg.FindAllStringSubmatch(res, 1)
        var tempTitle string   //用于處理title
        var tempContent string //用于處理content
        for _, title := range titles {

            // 把內(nèi)容存儲到文件中
            tempTitle = title[1]
            tempTitle = strings.Replace(tempTitle, "\r", "", -1)
            tempTitle = strings.Replace(tempTitle, "\n", "", -1)
            tempTitle = strings.Replace(tempTitle, " ", "", -1)
            tempTitle = strings.Replace(tempTitle, "\t", "", -1)
            fmt.Println("title:", tempTitle) //this is title.
            fileTitle = append(fileTitle, tempTitle)
        }

        // 處理content
        regContent := regexp.MustCompile(`<div class="content-txt pt10">(?s:(.*?))<a id="prev"`)
        if regContent == nil {
            fmt.Println("regexp error.")
            return
        }
        contents := regContent.FindAllStringSubmatch(res, 1)
        for _, content := range contents {

            // 把內(nèi)容存儲到文件中

            tempContent = content[1]
            // tempContent = strings.Replace(tempContent, "\r", "", -1)
            tempContent = strings.Replace(tempContent, "\n", "", -1)
            // tempContent = strings.Replace(tempContent, " ", "", -1)
            tempContent = strings.Replace(tempContent, "\t", "", -1)
            fmt.Println("content:", tempContent) //this is content.
            fileContent = append(fileContent, tempContent)
        }
        // 處理url
        fileUrl = append(fileUrl, v[1])

    }

    //fmt.Println("fileTitle = ", fileTitle)
    //fmt.Println("fileContent = ", fileContent)

    //saveToFile(page, fileTitle, fileContent)
    saveToFileWithUrl(page, fileTitle, fileContent, fileUrl)
    // fmt.Println("res:", res)

    mPage <- page

}

func saveToFile(page int, fileTitle, fileContent []string) {
    // 把內(nèi)容寫在文件里 1.html
    fileName := strconv.Itoa(page) + ".txt"
    f1, err1 := os.Create(fileName)
    if err1 != nil {
        fmt.Println("err1:", err1)
        return
    }

    defer f1.Close()

    n := len(fileTitle)
    for i := 0; i < n; i++ {
        f1.WriteString(fileTitle[i] + "\n")
        f1.WriteString(fileContent[i] + "\n")
        f1.WriteString("===================================================================\n")
    }

}
func saveToFileWithUrl(page int, fileTitle, fileContent, fileUrl []string) {
    // 把內(nèi)容寫在文件里 1.html
    fileName := strconv.Itoa(page) + ".txt"
    f1, err1 := os.Create(fileName)
    if err1 != nil {
        fmt.Println("err1:", err1)
        return
    }

    defer f1.Close()

    n := len(fileTitle)
    for i := 0; i < n; i++ {
        f1.WriteString(fileTitle[i] + "\n")
        f1.WriteString(fileContent[i] + "\n")
        f1.WriteString(fileUrl[i] + "\n")
        f1.WriteString("===================================================================\n")
    }

}

func httpGet(url string) (res string, err error) {
    resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
    if err1 != nil {
        fmt.Println("err1:", err1)
        err = err1
        return
    }
    defer resp.Body.Close()

    buff := make([]byte, 1024)
    for {
        n, _ := resp.Body.Read(buff) //(n int, err error)
        // if err2 != nil {
        //  fmt.Println("err2:", err2)
        //  err = err2
        //  return
        // }// 這里竟然不能這么寫千埃?憔儿??
        if n == 0 {
            //fmt.Println("讀取結(jié)束")
            break
        }
        res += string(buff[:n])

    }

    return

}

END.

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末放可,一起剝皮案震驚了整個濱河市谒臼,隨后出現(xiàn)的幾起案子,更是在濱河造成了極大的恐慌耀里,老刑警劉巖蜈缤,帶你破解...
    沈念sama閱讀 221,576評論 6 515
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件,死亡現(xiàn)場離奇詭異冯挎,居然都是意外死亡底哥,警方通過查閱死者的電腦和手機,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 94,515評論 3 399
  • 文/潘曉璐 我一進(jìn)店門房官,熙熙樓的掌柜王于貴愁眉苦臉地迎上來趾徽,“玉大人,你說我怎么就攤上這事翰守》跄蹋” “怎么了?”我有些...
    開封第一講書人閱讀 168,017評論 0 360
  • 文/不壞的土叔 我叫張陵蜡峰,是天一觀的道長了袁。 經(jīng)常有香客問我朗恳,道長,這世上最難降的妖魔是什么载绿? 我笑而不...
    開封第一講書人閱讀 59,626評論 1 296
  • 正文 為了忘掉前任僻肖,我火速辦了婚禮,結(jié)果婚禮上卢鹦,老公的妹妹穿的比我還像新娘臀脏。我一直安慰自己,他們只是感情好冀自,可當(dāng)我...
    茶點故事閱讀 68,625評論 6 397
  • 文/花漫 我一把揭開白布揉稚。 她就那樣靜靜地躺著,像睡著了一般熬粗。 火紅的嫁衣襯著肌膚如雪搀玖。 梳的紋絲不亂的頭發(fā)上,一...
    開封第一講書人閱讀 52,255評論 1 308
  • 那天驻呐,我揣著相機與錄音灌诅,去河邊找鬼。 笑死含末,一個胖子當(dāng)著我的面吹牛猜拾,可吹牛的內(nèi)容都是我干的。 我是一名探鬼主播佣盒,決...
    沈念sama閱讀 40,825評論 3 421
  • 文/蒼蘭香墨 我猛地睜開眼挎袜,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了肥惭?” 一聲冷哼從身側(cè)響起盯仪,我...
    開封第一講書人閱讀 39,729評論 0 276
  • 序言:老撾萬榮一對情侶失蹤,失蹤者是張志新(化名)和其女友劉穎蜜葱,沒想到半個月后全景,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體,經(jīng)...
    沈念sama閱讀 46,271評論 1 320
  • 正文 獨居荒郊野嶺守林人離奇死亡牵囤,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 38,363評論 3 340
  • 正文 我和宋清朗相戀三年爸黄,在試婚紗的時候發(fā)現(xiàn)自己被綠了。 大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片奔浅。...
    茶點故事閱讀 40,498評論 1 352
  • 序言:一個原本活蹦亂跳的男人離奇死亡馆纳,死狀恐怖,靈堂內(nèi)的尸體忽然破棺而出汹桦,到底是詐尸還是另有隱情鲁驶,我是刑警寧澤,帶...
    沈念sama閱讀 36,183評論 5 350
  • 正文 年R本政府宣布舞骆,位于F島的核電站钥弯,受9級特大地震影響径荔,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜脆霎,卻給世界環(huán)境...
    茶點故事閱讀 41,867評論 3 333
  • 文/蒙蒙 一总处、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧睛蛛,春花似錦鹦马、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 32,338評論 0 24
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至客冈,卻和暖如春旭从,著一層夾襖步出監(jiān)牢的瞬間,已是汗流浹背场仲。 一陣腳步聲響...
    開封第一講書人閱讀 33,458評論 1 272
  • 我被黑心中介騙來泰國打工和悦, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留,地道東北人渠缕。 一個月前我還...
    沈念sama閱讀 48,906評論 3 376
  • 正文 我出身青樓鸽素,卻偏偏與公主長得像,于是被迫代替她去往敵國和親褐健。 傳聞我的和親對象是個殘疾皇子付鹿,可洞房花燭夜當(dāng)晚...
    茶點故事閱讀 45,507評論 2 359