获取指标说明的能力

This commit is contained in:
2025-05-21 15:51:44 +08:00
parent 06c351a956
commit 1f329e3b2a
131 changed files with 4461 additions and 3126 deletions

View File

@@ -27,7 +27,7 @@ class EmoneyAutoApplicationTests {
@Test
void contextLoads() {
EmoneyClient.loginWithAnonymous();
EmoneyClient.relogin();
CandleStickWithIndex_Request request = new CandleStickWithIndex_Request();

View File

@@ -0,0 +1,285 @@
package quant.rich;
import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
import org.junit.jupiter.api.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import jodd.jerry.Jerry;
import lombok.extern.slf4j.Slf4j;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import quant.rich.emoney.EmoneyAutoApplication;
import quant.rich.emoney.client.EmoneyClient;
import quant.rich.emoney.client.OkHttpClientProvider;
import quant.rich.emoney.entity.config.EmoneyRequestConfig;
import quant.rich.emoney.pojo.dto.NonParamsIndexDetail;
import quant.rich.emoney.pojo.dto.NonParamsIndexDetail.NonParamsIndexDetailData;
@SpringBootTest
@ContextConfiguration(classes = EmoneyAutoApplication.class)
@RunWith(SpringJUnit4ClassRunner.class)
@Slf4j
public class EmoneyIndexScraper {
private static final ObjectMapper MAPPER = new ObjectMapper();
@Autowired
EmoneyRequestConfig emoneyRequestConfig;
static {
MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
}
public String buildUrl(Serializable indexCode) {
StringBuilder urlBuilder = new StringBuilder();
urlBuilder.append("https://appstatic.emoney.cn/html/emapp/stock/note/?name=");
urlBuilder.append(indexCode.toString());
urlBuilder.append("&emoneyScaleType=0&emoneyLandMode=0&token=");
urlBuilder.append(emoneyRequestConfig.getAuthorization());
return urlBuilder.toString();
}
@Test
void test() {
EmoneyClient.relogin();
String url = buildUrl(10002800);
// 1.1 按照顺序放置 Header
Request request = new Request.Builder()
.url(url)
.header("Host", "appstatic.emoney.cn")
.header("Connection", "keep-alive")
.header("Upgrade-Insecure-Requests", "1")
.header("User-Agent", emoneyRequestConfig.getWebviewUserAgent())
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
.header("X-Request-With", "cn.emoney.emstock")
.header("Sec-Fetch-Site", "none")
.header("Sec-Fetch-Mode", "navigate")
.header("Sec-Fetch-User", "?1")
.header("Sec-Fetch-Dest", "document")
.header("Accept-Encoding", "gzip, deflate")
.header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
.build();
// 1.2 请求
OkHttpClient client = OkHttpClientProvider.getInstance();
List<String> scripts = new ArrayList<>();
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
log.error("1.2 请求指标说明基本页面失败response.isSuccessful() is false");
return;
}
String responseBody = response.body().string();
Jerry jerryDoc = Jerry.of(responseBody);
jerryDoc.s("script[src]").forEach(el -> {
String absoluteURI = resolveUrl(url, el.attr("src"));
log.info("script uri: {}", absoluteURI);
if (absoluteURI != null) {
scripts.add(absoluteURI);
}
});
} catch (IOException e) {
// TODO Auto-generated catch block
log.error("1.2 请求指标说明基本页面失败IOException", e);
e.printStackTrace();
return;
}
if (scripts.size() == 0) {
log.error("未能获取基本页面内脚本链接");
return;
}
// 2. 加载 scripts试图匹配包含在 js 中的指标说明
Pattern p = Pattern.compile("\\[(\\{(id:\\d+,?|data:\\[\\{(title:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|items:\\[(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?)+],?|image:\".*?\",?)+\\}+\\],?|name:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|nameCode:\"\\d+\",?)+\\},?)+\\]", Pattern.DOTALL);
List<String> matchGroups = new ArrayList<>();
Request.Builder scriptBuilder = new Request.Builder()
.header("Host", "appstatic.emoney.cn")
.header("Connection", "keep-alive")
.header("User-Agent", emoneyRequestConfig.getWebviewUserAgent())
.header("Accept", "*/*")
.header("X-Request-With", "cn.emoney.emstock")
.header("Sec-Fetch-Site", "same-origin")
.header("Sec-Fetch-Mode", "no-cors")
.header("Sec-Fetch-Dest", "script")
.header("Accept-Encoding", "gzip, deflate")
.header("Referer", url)
.header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7");
for (String scriptUrl : scripts) {
Request scriptRequest = scriptBuilder.url(scriptUrl).build();
try (Response response = client.newCall(scriptRequest).execute()) {
if (!response.isSuccessful()) {
log.warn("2. 读取页面内 script 请求失败, response.isSuccessful() is false, scriptUrl: {}", scriptUrl);
return;
}
String responseBody = response.body().string();
// 尝试匹配
Matcher m = p.matcher(responseBody);
while (m.find()) {
String find = m.group();
Optional<String> jsonStringify = stringifyJsArray(find);
jsonStringify.ifPresent(jsonString -> matchGroups.add(jsonString));
}
} catch (IOException e) {
// TODO Auto-generated catch block
log.warn("1.2 请求失败", e);
e.printStackTrace();
return;
}
}
// 将每个 jsonString 转换为 jsonArray进一步转换成 IndexDetail
List<NonParamsIndexDetail> valid = new ArrayList<>();
List<ArrayNode> arrayNodes = new ArrayList<>();
for (String jsonString : matchGroups) {
try {
JsonNode root = MAPPER.readTree(jsonString);
if (root.isArray()) {
ArrayNode array = (ArrayNode) root;
for (JsonNode obj : array) {
if (obj.isObject() && matches(obj)) {
NonParamsIndexDetail detail = MAPPER.treeToValue(obj, NonParamsIndexDetail.class);
valid.add(detail);
}
}
}
} catch (Exception ignored) {
log.error("试图转换匹配项出错", ignored);
}
}
// 直接保存在本地
// 遍历 valid 内合法的 object, 获取 image
Pattern numericPattern = Pattern.compile("^\\d+$");
for (NonParamsIndexDetail detail : valid) {
// 判断 nameCode 是否是合法的代码
if (!numericPattern.matcher(detail.getNameCode()).matches()) {
continue;
}
Request.Builder imageBuilder = new Request.Builder()
.header("Host", "appstatic.emoney.cn")
.header("Connection", "keep-alive")
.header("User-Agent", emoneyRequestConfig.getWebviewUserAgent())
.header("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8")
.header("X-Request-With", "cn.emoney.emstock")
.header("Sec-Fetch-Site", "same-origin")
.header("Sec-Fetch-Mode", "no-cors")
.header("Sec-Fetch-Dest", "image")
.header("Accept-Encoding", "gzip, deflate")
.header("Referer", buildUrl(detail.getNameCode()))
.header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7");
List<NonParamsIndexDetailData> datas = detail.getData();
for (NonParamsIndexDetailData data : datas) {
if (data.getImage() != null) {
String image = data.getImage();
if (image.startsWith("data:image/") && image.contains(";base64,")) {
continue;
}
else {
image = resolveUrl(url, image);
Request imageRequest = imageBuilder.url(image).build();
try (Response response = client.newCall(imageRequest).execute()) {
if (!response.isSuccessful()) {
log.warn("获取详情图片请求失败, response.isSuccessful() is false, imageUrl: {}", image);
continue;
}
byte[] bytes = response.body().bytes();
String contentType = response.body().contentType().toString();
String base64 = Base64.getEncoder().encodeToString(bytes);
data.setImage("data:" + contentType + ";base64," + base64);
log.debug("获取图片成功");
} catch (IOException e) {
// TODO Auto-generated catch block
log.warn("获取详情图片请求失败", e);
e.printStackTrace();
return;
}
}
}
}
try {
Files.writeString(
Path.of("./conf/extra/nonParamsIndexDetail." + detail.getNameCode() + ".json"),
MAPPER.valueToTree(detail).toPrettyString());
} catch (IllegalArgumentException | IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return;
}
/**
* 判断某 jsonNode 是否为合法指标信息
* @param obj
* @return
*/
public static boolean matches(JsonNode obj) {
return obj.has("name") && obj.get("name").isTextual() &&
obj.has("nameCode") && obj.get("nameCode").isTextual() &&
obj.has("data") && obj.get("data").isArray()
;
}
/**
* 使用 GraalVM JS 尝试 JSON.stringify
* @param jsArrayText
* @return
*/
public static Optional<String> stringifyJsArray(String jsArrayText) {
try (Context context = Context.create("js")) {
Value result = context.eval("js", "JSON.stringify(" + jsArrayText + ")");
return result.isString() ? Optional.of(result.asString()) : Optional.empty();
} catch (Exception e) {
log.warn("Failed to call JSON.stringify on {}", jsArrayText, e);
return Optional.empty();
}
}
public static String resolveUrl(String pageUrl, String scriptSrc) {
try {
URI base = URI.create(pageUrl);
return base.resolve(scriptSrc).toString();
} catch (Exception e) {
log.info("转换 URI 错误pageUrl: {}, scriptSrc: {} {}", pageUrl, scriptSrc, e);
return null;
}
}
}

View File

@@ -1,12 +1,11 @@
package quant.rich;
import java.lang.reflect.Field;
import java.lang.reflect.Type;
import java.util.Map;
import java.util.function.Consumer;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import okhttp3.Request;
import quant.rich.emoney.patch.okhttp.RequestContext;
import quant.rich.emoney.patch.okhttp.PatchOkHttp;
import quant.rich.emoney.patch.okhttp.PatchOkHttpRule;
@@ -20,7 +19,11 @@ public class PatchOkHttpTest {
RequestContext context;
rule = PatchOkHttpRule.when().isHttps().build();
context = new RequestContext(Map.of(), "https", "localhost");
context = new RequestContext(
new Request.Builder()
.url("https://localhost"));
Assertions.assertTrue(rule.matches(context), "测试失败");
@@ -30,11 +33,14 @@ public class PatchOkHttpTest {
.or(b -> b.hasHeaderName("X-Protocol-Id"))
.overrideIf("User-Agent", "okhttp/3.12.2")
.build();
context = new RequestContext(Map.of(), "https", "mbs.emoney.cn");
context = new RequestContext(new Request.Builder()
.url("https://mbs.emoney.cn"));
Assertions.assertTrue(rule.matches(context), "测试失败");
context = new RequestContext(Map.of(), "https", "emapp-static.oss-cn-shanghai.aliyuncs.com");
context = new RequestContext(new Request.Builder()
.url("https://emapp-static.oss-cn-shanghai.aliyuncs.com"));
Assertions.assertTrue(rule.matches(context), "测试失败");
// 测试 Override
@@ -42,13 +48,17 @@ public class PatchOkHttpTest {
Consumer<String> consumer = str -> {
modifier[0] = str;
};
PatchOkHttp.apply(rule);
PatchOkHttp.match(context, "User-Agent", consumer);
Assertions.assertTrue("okhttp/3.12.2".equals(modifier[0]), "测试失败User-Agent 覆写失败");
modifier[0] = "";
context = new RequestContext(Map.of(), "https", "hao123.com");
context = new RequestContext(new Request.Builder()
.url("https://hao123.com"));
Assertions.assertFalse(rule.matches(context), "测试失败");
PatchOkHttp.match(context, "User-Agent", consumer);
Assertions.assertTrue("".equals(modifier[0]), "测试失败User-Agent 不覆写失败");

View File

@@ -0,0 +1,46 @@
package quant.rich;
import lombok.extern.slf4j.Slf4j;
import quant.rich.emoney.EmoneyAutoApplication;
import quant.rich.emoney.client.EmoneyClient;
import quant.rich.emoney.client.WebviewClient;
import quant.rich.emoney.client.WebviewClient.WebviewResponseWrapper;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import org.junit.jupiter.api.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@SpringBootTest
@ContextConfiguration(classes = EmoneyAutoApplication.class)
@RunWith(SpringJUnit4ClassRunner.class)
@Slf4j
public class RelativeEmoneyScraper {
static final Pattern nonParamsIndexDetailPattern = Pattern.compile("\\[(\\{(id:\\d+,?|data:\\[\\{(title:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|items:\\[(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?)+],?|image:\".*?\",?)+\\}+\\],?|name:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|nameCode:\"\\d+\",?)+\\},?)+\\]", Pattern.DOTALL);
@Test
void test() throws Exception {
String js = Files.readString(Path.of("./conf/extra/indexJs.js"));
String jsArrayText;
Matcher m = nonParamsIndexDetailPattern.matcher(js);
if (m.find()) jsArrayText = m.group(); else return;
ScriptEngine jsEngine = new ScriptEngineManager().getEngineFactories().get(0).getScriptEngine();
Object result = jsEngine.eval("JSON.stringify(" + jsArrayText + ")");
if (result != null && result instanceof String resultString)
log.info(resultString);
}
}

View File

@@ -0,0 +1,109 @@
package quant.rich;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.graalvm.polyglot.*;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
public class TestIndexJsMatch {
private static final ObjectMapper MAPPER = new ObjectMapper();
public static void main(String[] args) throws IOException {
String script = Files.readString(Path.of("./conf/extra/indexJs.js"), StandardCharsets.UTF_8);
/** 一般来说如果要匹配,只会匹配出一个,但是为了保险起见还是做一个 List 来存储 */
List<ArrayNode> arrays = extractValidArrays(script);
for (ArrayNode array : arrays) {
System.out.println("✔ 匹配数组,共 " + array.size() + " 个对象");
for (JsonNode obj : array) {
System.out.println(obj.toPrettyString());
}
}
}
// 提取所有合法的 [...] 结构,支持嵌套匹配
public static List<String> extractAllArrays(String text) {
List<String> result = new ArrayList<>();
/**
* 这个正则表达式看起来复杂,实际上简单:
* 1. 欲匹配对象是一个 Array所以最外层必包含 \[...\]
* 2. 内容是若干 Object所以会是 (\{...\},?)+
* 3. 每个 Object 内可能包含以下几项:
* 3.1. id可选数字故为 (id:\d+,?)?
* 3.2. dataObject 数组, 包含:
* 3.2.1. title字符串故为 title:".*?",?
* 3.2.2. items字符串数组故为 items:\[(".*?",?)+],?
* 3.2.3. image可选字符串故为 image:".*?",?
* 3.2.4. 组合后即为 data:\[\{(title:".*?",?|items:\[(".*?",?)+],?|image:".*?",?)+\}\],?
* 3.3. name字符串故为 name:".*?",?
* 3.4. nameCode数字型字符串故为 nameCode:"\d+",?
* 3.5. 组合后即为 (\{(id:\d+,?|data:\[\{(title:".*?",?|items:\[(".*?",?)+],?|image:".*?",?)+\}+\],?|name:".*?",?|nameCode:"\d+",?)+\},?)+
* 4. 组合上述所有即为 \[(\{(id:\d+,?|data:\[\{(title:".*?",?|items:\[(".*?",?)+],?|image:".*?",?)+\}+\],?|name:".*?",?|nameCode:"\d+",?)+\},?)+\]
* 5. 将 ".*?" 替换为 "[^"\\]*(?:\\.[^"\\]*)*?" 以防止字符串中带双引号导致的异常截断image 除外
*/
Pattern p = Pattern.compile("\\[(\\{(id:\\d+,?|data:\\[\\{(title:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|items:\\[(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?)+],?|image:\".*?\",?)+\\}+\\],?|name:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|nameCode:\"\\d+\",?)+\\},?)+\\]", Pattern.DOTALL);
Matcher m = p.matcher(text);
while (m.find()) {
result.add(m.group());
}
return result;
}
// 使用 GraalVM 的 JS 引擎尝试 JSON.stringify
public static Optional<String> stringifyJsArray(String jsArrayText) {
try (Context context = Context.create("js")) {
Value result = context.eval("js", "JSON.stringify(" + jsArrayText + ")");
return result.isString() ? Optional.of(result.asString()) : Optional.empty();
} catch (Exception e) {
return Optional.empty();
}
}
// 判断某个对象是否结构匹配
public static boolean matches(JsonNode obj) {
return obj.has("name") &&
obj.has("nameCode") &&
obj.has("data") &&
obj.get("data").isArray();
}
// 主处理逻辑
public static List<ArrayNode> extractValidArrays(String jsSource) {
List<ArrayNode> valid = new ArrayList<>();
for (String raw : extractAllArrays(jsSource)) {
stringifyJsArray(raw).ifPresent(json -> {
try {
JsonNode root = MAPPER.readTree(json);
if (root.isArray()) {
ArrayNode array = (ArrayNode) root;
for (JsonNode obj : array) {
if (obj.isObject() && matches(obj)) {
valid.add(array);
break;
}
}
}
} catch (Exception ignored) {
}
});
}
return valid;
}
}