go语言如何实现python那样在前一次请求上保留cookies去请求

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
    "net/url"
    "io"
    "log"
    "regexp"
    "strings"
)

var cookies_lagou []*http.Cookie

const (
    login_url_lagou           string = "https://passport.lagou.com/login/login.html"

    post_login_info_url_lagou string = "https://passport.lagou.com/login/login.json"

    username_lagou            string = "xxxxxxxxx"
    password_lagou            string = "4525674692ac06e619cdb3f1b4b65b08"
)

func getToken(contents io.Reader)  (string,string){

    data, _ := ioutil.ReadAll(contents)
    regCode := regexp.MustCompile(`X_Anti_Forge_Code\s+\=(.+?);`)
    if regCode == nil {
        log.Fatal("解析Code出错...")
    }

    //提取关键信息
    code := regCode.FindAllStringSubmatch(string(data), -1)[0][1]

    regToken := regexp.MustCompile(`X_Anti_Forge_Token\s+\=(.+?);`)
    if regToken == nil {
        fmt.Println("MustCompile err")
    }

    //提取关键信息
    token := regToken.FindAllStringSubmatch(string(data), -1)[0][1]

    return token,code
}

func login_lagou() {
    //获取登陆界面的cookie
    jar, _ := cookiejar.New(nil)
    client := &http.Client{
        Jar: jar,
    }
    req, _ := http.NewRequest("GET", login_url_lagou, nil)
    res, _ := client.Do(req)
    for k, v := range res.Cookies() {
        fmt.Printf("%v=%v\n",k,v)
    }
    token,code := getToken(res.Body)
    //post数据
    postValues := url.Values{}
    postValues.Add("isValidate", "true")
    postValues.Add("username", username_lagou)
    postValues.Add("password", password_lagou)
    postValues.Add("request_form_verifyCode", "")
    postValues.Add("submit", "")
    body := ioutil.NopCloser(strings.NewReader(postValues.Encode())) //把form数据编下码
    requ, _ := http.NewRequest("POST", post_login_info_url_lagou, body)

    requ.Header.Set("X-Requested-With","XMLHttpRequest")
    requ.Header.Set("Content-Type","application/x-www-form-urlencoded; charset=UTF-8")
    requ.Header.Set("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36")
    requ.Header.Set("Host","passport.lagou.com")
    requ.Header.Set("Origin","https://passport.lagou.com")
    requ.Header.Add("X-Anit-Forge-Token",token)
    requ.Header.Add("X-Anit-Forge-Code",code)
    requ.Header.Set("Accept","application/json, text/javascript, */*; q=0.01")
    requ.Header.Set("Connection","keep-alive")
    requ.Header.Set("Accept-Language","en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7")
    requ.Header.Set("Accept-Encoding","gzip, deflate, br")
    //requ.Header.Set("Content-Length","111")
    //requ.Header.Set("Cookie","JSESSIONID=ABAAABAAAHAAAFD632D7028F2AC7466F2DAA44C1BE15A01; user_trace_token=20180418184827-7aec3c11-930a-494a-9a73-c83419b1450a; X_HTTP_TOKEN=7ab0dd4e12bf94d2d4c0c3370e1c6341; _ga=GA1.2.837634606.1524048588; _gid=GA1.2.1349141044.1524048588; _ga=GA1.3.837634606.1524048588; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1523518887,1524030389,1524033687,1524045972; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524048588; LGSID=20180418184827-06c3d493-42f6-11e8-8a1c-525400f775ce; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=http%3A%2F%2Fpassport.lagou.com%2Flogin%2Flogin.html; LGUID=20180418184827-06c3d687-42f6-11e8-8a1c-525400f775ce; LGRID=20180418185212-8ca212e6-42f6-11e8-b8a8-5254005c3644; TG-TRACK-CODE=undefined")

    for _, v := range res.Cookies() {
        requ.AddCookie(v)
    }

    res,_ = client.Do(requ)
    //cookies_lagou = res.Cookies()
    data, _ := ioutil.ReadAll(res.Body)
    res.Body.Close()
    fmt.Println(string(data))
}

func main() {
    login_lagou()
}

以上代码运行结果为

0=JSESSIONID=ABAAABAAAHAAAFD5DA9395672E82BF6CEDCAE3CA350ED64; Path=/; HttpOnly
1=user_trace_token=20180418221336-b03f8889-f175-46cf-b986-af55a8ef3e33; Path=/; Domain=lagou.com; Max-Age=31536000
{"content":{"rows":[]},"message":"非法请求,参数错误","state":299}

Process finished with exit code 0

我参考的是这篇文章python -- 拉勾网爬虫模拟登录 在python中,有requests包,requests.session中自动保留了前面的cookies,session信息, 但是用golang实现的时候就不行,也设置了cookies,把login.html的cookies遍历添加了,再去请求登录还是不行。 在实际的登录中,cooike好像多了一些东西,不知道是不是这个的缘故。 浏览器登录Header信息:

Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7
Connection: keep-alive
Content-Length: 111
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Cookie: user_trace_token=20180418184827-7aec3c11-930a-494a-9a73-c83419b1450a; _ga=GA1.2.837634606.1524048588; _gid=GA1.2.1349141044.1524048588; _ga=GA1.3.837634606.1524048588; LGUID=20180418184827-06c3d687-42f6-11e8-8a1c-525400f775ce; index_location_city=%E6%88%90%E9%83%BD; gate_login_token=e27e32a29e46e476b94f06e8e0a5b6d149dbb2a5efd97c51; LGSID=20180418212135-6b42e55b-430b-11e8-b8a9-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524030389,1524033687,1524045972,1524057776; JSESSIONID=ABAAABAAAHAAAFD5346FCED99BA9CA2991F52D85C9D0982; X_HTTP_TOKEN=7ab0dd4e12bf94d2d4c0c3370e1c6341; TG-TRACK-CODE=undefined; _gat=1; LGRID=20180418213214-e7ac4b8b-430c-11e8-8a7e-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524058415
Host: passport.lagou.com
Origin: https://passport.lagou.com
Referer: https://passport.lagou.com/login/login.html
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36
X-Anit-Forge-Code: 21248807
X-Anit-Forge-Token: cc96fefa-0bad-4a06-8674-48fc1b42e125
X-Requested-With: XMLHttpRequest

请问各位前辈,是我的代码有问题吗?自己已经百度,google了两天,各种折腾,甚至把python代码拉下来运行,但是可以模拟登陆,golang就不行,实在没办法,所以来论坛求助各位前辈,求前辈们指点,感激不尽。

已邀请:

tkk

赞同来自: huhuyou2

不是有个cookieJar吗

Xargin

赞同来自:

先获取到 cookie, 之后的请求发去 httpbin.org/post 看看返回的 diff 吧

只能自己调了

pathbox - https://pathbox.github.io/

赞同来自:

  1. 你这个应该是想要登入成功,然后拿到cookie值.拿到cookie值之后再用于接下来的爬虫

  2. 返回的信息是告诉你登入失败了,和cookie包的使用没有关系。为什么会登入失败呢?代码应该没问题,问题出在了账号或密码错误了, 然而 你输入的账号已经 md5 拼接得到的password应该是正确的

  3. 你没有阅读 Python session 这个包的源码,不明白它到底做了哪些事情。但是,可以稍微猜一下(我也没有读过其源码),无疑是获取session 或cookie 信息,操作 request.Header

4.requ.Header.Set("Referer", "https://passport.lagou.com/login/login.html") 加上这一句,似乎没有报错拿到了返回值

swordlet

赞同来自:

可能是服务端,对输出进行gzip编码了,要解压,不然是乱码。浏览器是会自动解压的。

import "compress/gzip"
defer resp.Body.Close()

    var reader io.ReadCloser
    switch resp.Header.Get("Content-Encoding") {
    case "gzip":
        reader, err = gzip.NewReader(resp.Body)
        defer reader.Close()
    default:
        reader = resp.Body
    }
body, err = ioutil.ReadAll(reader)

huhuyou2 - fish

赞同来自:

https://segmentfault.com/q/1010000010339661

我之前提的一个问题,不知道有用,你参考一下

要回复问题请先登录注册