服务器之家

服务器之家 > 正文

java 实现通过 post 方式提交json参数操作

时间:2020-09-13 00:08     来源/作者:joexk

由于所爬取的网站需要验证码,通过网页的开发人员工具【F12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证码校验。

而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。

现将通过 post 方式提交json参数的方法记录如下:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
 
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
 
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
 
/**
 * <p>@PostJsonParamsTest.java</p>
 * @version 1.0
 * @author zxk
 * @Date 2018-3-3
 */
public class PostJsonParamsTest {
 
  // 超时时间
  private static final int RUN_TIME =10000;
 
  // 爬取初始页数
  private String page;
 
  public static void main(String[] args) throws Exception {
    PostJsonParamsTest crawl = new PostJsonParamsTest();
 
    // 请求的url地址
    String url ="http://www.gzcredit.gov.cn/Service/CreditService.asmx/searchOrgWithPage";
    // 设置起始访问页码
    crawl.setPage("1");
    String isStop = "";
 
    // 设置请求
    HttpRequestBase request = null;
    request = new HttpPost(url);
 
    try {
      // 设置config
      RequestConfig requestConfig = RequestConfig.custom()
            .setSocketTimeout(RUN_TIME)
            .setConnectTimeout(RUN_TIME)
            .setConnectionRequestTimeout(RUN_TIME)
            .build();
      request.setConfig(requestConfig);
 
      // json 格式的 post 参数
      String postParams ="{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageNo\":"+crawl.getPage()+",\"pageSize\":100,count:2709846}";
      System.out.println(postParams);
      HttpEntity httpEntity = new StringEntity(postParams);
      ((HttpPost) request).setEntity(httpEntity);
 
      // 添加请求头,可以绕过验证码
      request.addHeader("Accept","application/json, text/javascript, */*");
      request.addHeader("Accept-Encoding","gzip, deflate");
      request.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
      request.addHeader("Connection", "keep-alive");
      request.addHeader("Host", "www.gzcredit.gov.cn");
      request.addHeader("Content-Type", "application/json; charset=UTF-8");
 
      URIBuilder builder = new URIBuilder(url);      
      URI uri = builder.build();
      uri = new URI(URLDecoder.decode(uri.toString(), "UTF-8"));
      request.setURI(uri);
 
      while(!isStop.equals("停止")||isStop.equals("重跑")){
        isStop = crawl.crawlList(request);
        if(isStop.equals("爬取")){
          crawl.setPage(String.valueOf(Integer.parseInt(crawl.getPage())+1));
        }
 
        // if("2713".equals(crawl.getPage())) break;
        if("2".equals(crawl.getPage())){
          break;
        }
      }
    } catch (NumberFormatException e) {
      e.printStackTrace();
      throw new NumberFormatException("数字格式错误");
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
      throw new UnsupportedEncodingException("不支持的编码集");
    }
  }
  /**
   * 爬取搜索列表
   * @param page
   * @return
   */
  private String crawlList(HttpRequestBase request){
    int statusCode = 0;
 
    // 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器
    CloseableHttpClient httpClient = HttpClients.createDefault();
    // HttpClient httpClient = HttpClientBuilder.create().build();
 
    HttpEntity httpEntity = null;
    HttpResponse response = null;
    try {     
      try {       
        response = httpClient.execute(request);
      } catch (Exception e){
        e.printStackTrace();
        EntityUtils.consumeQuietly(httpEntity);
        return "重跑";
      }
 
      //打印状态
      statusCode =response.getStatusLine().getStatusCode();
      if(statusCode!=200){
        EntityUtils.consumeQuietly(httpEntity);
        return "重跑";
      }
      //实体
      httpEntity = response.getEntity();
      String searchListStr = EntityUtils.toString(httpEntity,"GBK").replaceAll("\\\\米", "米");
      String allData = (String) JSONObject.parseObject(searchListStr).get("d");
      // 字符串值中间含双引号的替换处理
      String s = allData.replaceAll("\\{\"","{'")
          .replaceAll("\":\"", "':'")
          .replaceAll("\",\"", "','")
          .replaceAll("\":", "':")
          .replaceAll(",\"", ",'")
          .replaceAll("\"\\}", "'}")
          .replaceAll("\"", "")
          .replaceAll("'", "\"")
          .replaceAll("<br />", "")       
          .replaceAll("\t", "")
          .replaceAll("\\\\", "?");
      JSONObject jsonData = JSONObject.parseObject(s);
      JSONArray jsonContent = jsonData.getJSONArray("orgList");
 
      searchListStr = null;
      allData = null;
      s = null;
 
      if (jsonContent==null || jsonContent.size()<1) {
        return "重跑";
      }
      System.out.println(jsonContent.toJSONString());
      return "爬取";
    } catch (Exception e) {
      e.printStackTrace();
      return "重跑";
    } finally{
      EntityUtils.consumeQuietly(httpEntity);
    }
  }
 
  private String getPage() {
    return page;
  }
 
  private void setPage(String page) {
    this.page = page;
  }
 
}

补充知识:JAVA利用HttpClient发送post请求,将请求数据放到body里

我就废话不多说了,大家还是直接看代码吧~

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/**
 * post请求 ,请求数据放到body里
 * @param url  请求地址
 * @param bodyData 参数
 * @author wangyj
 * @date 2019年4月20日
 */
public static String doPostBodyData(String url, String bodyData) throws Exception{
  String result = "";
  CloseableHttpClient httpClient = null;
  CloseableHttpResponse response = null;
  try {
    HttpPost httpPost = getHttpPost(url, null); // 请求地址
    httpPost.setEntity(new StringEntity(bodyData, Encoding));
    httpClient = getHttpClient();
    // 得到返回的response
    response = httpClient.execute(httpPost);
    HttpEntity entity = response.getEntity();
    result = getResult(entity, Encoding);
  } catch (Exception e) {
    throw e;
  } finally {
    // 关闭httpClient
    if (null != httpClient) {
      httpClient.close();
    }
    // 关闭response
    if (null != response) {
      EntityUtils.consume(response.getEntity()); // 会自动释放连接
      response.close();
    }
  }
  return result;
}

以上这篇java 实现通过 post 方式提交json参数操作就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。

原文链接:https://blog.csdn.net/zhouxukun123/article/details/79441031

标签:

相关文章

热门资讯

2020微信伤感网名听哭了 让对方看到心疼的伤感网名大全
2020微信伤感网名听哭了 让对方看到心疼的伤感网名大全 2019-12-26
Intellij idea2020永久破解,亲测可用!!!
Intellij idea2020永久破解,亲测可用!!! 2020-07-29
歪歪漫画vip账号共享2020_yy漫画免费账号密码共享
歪歪漫画vip账号共享2020_yy漫画免费账号密码共享 2020-04-07
电视剧《琉璃》全集在线观看 琉璃美人煞1-59集免费观看地址
电视剧《琉璃》全集在线观看 琉璃美人煞1-59集免费观看地址 2020-08-12
最新idea2020注册码永久激活(激活到2100年)
最新idea2020注册码永久激活(激活到2100年) 2020-07-29
返回顶部