增加HDU题目爬虫 v0.5
This commit is contained in:
parent
b8b844336a
commit
c2a3185356
|
@ -1,18 +1,19 @@
|
||||||
package top.hcode.hoj.remoteJudge.task.Impl;
|
package top.hcode.hoj.remoteJudge.task.Impl;
|
||||||
|
|
||||||
import cn.hutool.core.codec.Base64;
|
|
||||||
import cn.hutool.core.map.MapUtil;
|
import cn.hutool.core.map.MapUtil;
|
||||||
import cn.hutool.core.util.ReUtil;
|
import cn.hutool.core.util.ReUtil;
|
||||||
import cn.hutool.json.JSONUtil;
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.RegExp;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.lang.StringEscapeUtils;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.helper.Validate;
|
import org.jsoup.helper.Validate;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Entities;
|
||||||
|
import top.hcode.hoj.pojo.entity.Problem;
|
||||||
import top.hcode.hoj.remoteJudge.task.RemoteJudgeStrategy;
|
import top.hcode.hoj.remoteJudge.task.RemoteJudgeStrategy;
|
||||||
import top.hcode.hoj.util.Constants;
|
import top.hcode.hoj.util.Constants;
|
||||||
import top.hcode.hoj.util.JsoupUtils;
|
import top.hcode.hoj.util.JsoupUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
@ -20,12 +21,14 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class HduJudge implements RemoteJudgeStrategy {
|
public class HduJudge implements RemoteJudgeStrategy {
|
||||||
public static final String host = "http://acm.hdu.edu.cn";
|
public static final String JUDGE_NAME = "HDU";
|
||||||
public static final String loginUrl = "/userloginex.php?action=login";
|
public static final String HOST = "http://acm.hdu.edu.cn";
|
||||||
public static final String submitUrl = "/submit.php?action=submit";
|
public static final String LOGIN_URL = "/userloginex.php?action=login";
|
||||||
public static final String statusUrl = "/status.php?user=%s&pid=%d";
|
public static final String SUBMIT_URL = "/submit.php?action=submit";
|
||||||
public static final String queryUrl = "/status.php?first=%d";
|
public static final String STATUS_URL = "/status.php?user=%s&pid=%d";
|
||||||
public static final String errorUrl = "/viewerror.php?rid=%d";
|
public static final String QUERY_URL = "/status.php?first=%d";
|
||||||
|
public static final String ERROR_URL = "/viewerror.php?rid=%d";
|
||||||
|
public static final String PROBLEM_URL = "/showproblem.php?pid=%s";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param problemId 提交的题目id
|
* @param problemId 提交的题目id
|
||||||
|
@ -39,7 +42,7 @@ public class HduJudge implements RemoteJudgeStrategy {
|
||||||
return -1L;
|
return -1L;
|
||||||
}
|
}
|
||||||
Map<String, String> loginCookie = getLoginCookie();
|
Map<String, String> loginCookie = getLoginCookie();
|
||||||
Connection connection = JsoupUtils.getConnectionFromUrl(host + submitUrl, null, loginCookie);
|
Connection connection = JsoupUtils.getConnectionFromUrl(HOST + SUBMIT_URL, null, loginCookie);
|
||||||
Connection.Response response = JsoupUtils.postResponse(connection, MapUtil
|
Connection.Response response = JsoupUtils.postResponse(connection, MapUtil
|
||||||
.builder(new HashMap<String, String>())
|
.builder(new HashMap<String, String>())
|
||||||
.put("check", "0")
|
.put("check", "0")
|
||||||
|
@ -58,7 +61,7 @@ public class HduJudge implements RemoteJudgeStrategy {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Object> result(Long submitId) throws Exception {
|
public Map<String, Object> result(Long submitId) throws Exception {
|
||||||
String url = host + String.format(queryUrl, submitId);
|
String url = HOST + String.format(QUERY_URL, submitId);
|
||||||
Connection connection = JsoupUtils.getConnectionFromUrl(url, null, null);
|
Connection connection = JsoupUtils.getConnectionFromUrl(url, null, null);
|
||||||
Connection.Response response = JsoupUtils.getResponse(connection, null);
|
Connection.Response response = JsoupUtils.getResponse(connection, null);
|
||||||
// 1提交时间 2结果 3执行时间 4执行空间 5代码长度
|
// 1提交时间 2结果 3执行时间 4执行空间 5代码长度
|
||||||
|
@ -83,7 +86,7 @@ public class HduJudge implements RemoteJudgeStrategy {
|
||||||
result.put("memory", Integer.parseInt(executionMemory));
|
result.put("memory", Integer.parseInt(executionMemory));
|
||||||
// 如果CE了,则还需要获得错误信息
|
// 如果CE了,则还需要获得错误信息
|
||||||
if (statusType == Constants.Judge.STATUS_COMPILE_ERROR) {
|
if (statusType == Constants.Judge.STATUS_COMPILE_ERROR) {
|
||||||
connection.url(host + String.format(errorUrl, submitId));
|
connection.url(HOST + String.format(ERROR_URL, submitId));
|
||||||
response = JsoupUtils.getResponse(connection, null);
|
response = JsoupUtils.getResponse(connection, null);
|
||||||
String compilationErrorInfo = ReUtil.get("(<pre>[\\s\\S]*?</pre>)", response.body(), 1);
|
String compilationErrorInfo = ReUtil.get("(<pre>[\\s\\S]*?</pre>)", response.body(), 1);
|
||||||
result.put("CEInfo", compilationErrorInfo);
|
result.put("CEInfo", compilationErrorInfo);
|
||||||
|
@ -91,9 +94,39 @@ public class HduJudge implements RemoteJudgeStrategy {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Problem getProblemInfo(String problemId) throws Exception {
|
||||||
|
// 验证题号是否符合规范
|
||||||
|
Validate.isTrue(problemId.matches("[1-9]\\d*"));
|
||||||
|
Problem info = new Problem();
|
||||||
|
String url = HOST + String.format(PROBLEM_URL, problemId);
|
||||||
|
System.out.println(url);
|
||||||
|
Connection connection = JsoupUtils.getConnectionFromUrl(url, null, null);
|
||||||
|
Document document = JsoupUtils.getDocument(connection, null);
|
||||||
|
String html = document.html();
|
||||||
|
System.out.println(html);
|
||||||
|
info.setTitle(ReUtil.get("color:#1A5CC8\">([\\s\\S]*?)</h1>", html, 1).trim());
|
||||||
|
info.setTimeLimit(Integer.parseInt(ReUtil.get("(\\d*) MS", html, 1)));
|
||||||
|
info.setMemoryLimit(Integer.parseInt(ReUtil.get("/(\\d*) K", html, 1)));
|
||||||
|
info.setDescription(ReUtil.get(">Problem Description</div>\\s+<.*?>(.*?)<br></div>", html, 1));
|
||||||
|
info.setInput(ReUtil.get(">Input</div>.*?<.*?>(.*?)<br></div>", html, 1));
|
||||||
|
info.setOutput(ReUtil.get(">Output</div>.*?<.*?>(.*?)<br></div>", html, 1));
|
||||||
|
StringBuilder sb = new StringBuilder("<input>");
|
||||||
|
sb.append(ReUtil.get(">Sample Input</div><div .*?,monospace;\">([\\s\\S]*?)</div></pre>", html, 1));
|
||||||
|
sb.append("</input><output>");
|
||||||
|
// TODO 筛选output和hint
|
||||||
|
sb.append(ReUtil.get(">Sample Output</div><.*?monospace;\">(.*)(<div style=)*?", html, 1)).append("</output>");
|
||||||
|
info.setExamples(sb.toString());
|
||||||
|
info.setHint(ReUtil.get("<i>Hint</i></div>([\\s\\S]*?)<br><[^<>]*?panel_title[^<>]*?>", html, 1));
|
||||||
|
info.setIsRemote(true);
|
||||||
|
info.setSource(JUDGE_NAME + "-" + problemId);
|
||||||
|
info.setType(0);
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> getLoginCookie() throws Exception {
|
public Map<String, String> getLoginCookie() throws Exception {
|
||||||
Connection connection = JsoupUtils.getConnectionFromUrl(host + loginUrl, null, null);
|
Connection connection = JsoupUtils.getConnectionFromUrl(HOST + LOGIN_URL, null, null);
|
||||||
Connection.Response response = JsoupUtils.postResponse(connection, MapUtil
|
Connection.Response response = JsoupUtils.postResponse(connection, MapUtil
|
||||||
.builder(new HashMap<String, String>())
|
.builder(new HashMap<String, String>())
|
||||||
// TODO 添加账号密码 暂时写死测试,后续将在队列中获取空闲账号
|
// TODO 添加账号密码 暂时写死测试,后续将在队列中获取空闲账号
|
||||||
|
@ -125,9 +158,10 @@ public class HduJudge implements RemoteJudgeStrategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Long getMaxRunId(Connection connection, String userName, Long problemId) throws Exception {
|
public Long getMaxRunId(Connection connection, String userName, Long problemId) throws Exception {
|
||||||
String url = String.format(statusUrl, userName, problemId);
|
String url = String.format(STATUS_URL, userName, problemId);
|
||||||
connection.url(host + url);
|
connection.url(HOST + url);
|
||||||
Connection.Response response = JsoupUtils.getResponse(connection, null);
|
Connection.Response response = JsoupUtils.getResponse(connection, null);
|
||||||
Matcher matcher = Pattern.compile("<td height=22px>(\\d+)").matcher(response.body());
|
Matcher matcher = Pattern.compile("<td height=22px>(\\d+)").matcher(response.body());
|
||||||
return matcher.find() ? Long.parseLong(matcher.group(1)) : -1L;
|
return matcher.find() ? Long.parseLong(matcher.group(1)) : -1L;
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package top.hcode.hoj.remoteJudge.task;
|
package top.hcode.hoj.remoteJudge.task;
|
||||||
|
|
||||||
|
import top.hcode.hoj.pojo.entity.Problem;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
@ -25,4 +27,11 @@ public interface RemoteJudgeStrategy {
|
||||||
|
|
||||||
String getLanguage(String language);
|
String getLanguage(String language);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param problemId String的原因是因为某些题库题号不是纯数字
|
||||||
|
* @return 返回Problem对象
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
Problem getProblemInfo(String problemId) throws Exception;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import cn.hutool.json.JSONObject;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Entities;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -47,6 +48,16 @@ public class JsoupUtils {
|
||||||
}
|
}
|
||||||
return connection.method(Connection.Method.GET).execute();
|
return connection.method(Connection.Method.GET).execute();
|
||||||
}
|
}
|
||||||
|
public static Document getDocument(Connection connection, Map<String, String> getData) throws IOException {
|
||||||
|
//添加参数
|
||||||
|
if (getData != null) {
|
||||||
|
connection.data(getData);
|
||||||
|
}
|
||||||
|
Document document = connection.get();
|
||||||
|
document.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
|
||||||
|
document.outputSettings().prettyPrint(false);
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package top.hcode.hoj.remoteJudge.task.Impl;
|
package top.hcode.hoj.remoteJudge.task.Impl;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import top.hcode.hoj.pojo.entity.Problem;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -14,8 +15,11 @@ class HduJudgeTest {
|
||||||
HduJudge hduJudge = new HduJudge();
|
HduJudge hduJudge = new HduJudge();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Map<String, Object> submit = hduJudge.result(35329033L);
|
// 有hint
|
||||||
System.out.println(submit);
|
Problem problemInfo = hduJudge.getProblemInfo("1425");
|
||||||
|
// 无hint
|
||||||
|
// Problem problemInfo = hduJudge.getProblemInfo("1090");
|
||||||
|
System.out.println(problemInfo);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue