package quant.rich; import java.io.IOException; import java.io.Serializable; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Base64; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Value; import org.junit.jupiter.api.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import jodd.jerry.Jerry; import lombok.extern.slf4j.Slf4j; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; import quant.rich.emoney.EmoneyAutoApplication; import quant.rich.emoney.client.EmoneyClient; import quant.rich.emoney.client.OkHttpClientProvider; import quant.rich.emoney.pojo.dto.NonParamsIndexDetail; import quant.rich.emoney.pojo.dto.NonParamsIndexDetail.NonParamsIndexDetailData; import quant.rich.emoney.service.sqlite.RequestInfoService; @SpringBootTest @ContextConfiguration(classes = EmoneyAutoApplication.class) @RunWith(SpringJUnit4ClassRunner.class) @Slf4j public class EmoneyIndexScraper { private static final ObjectMapper MAPPER = new ObjectMapper(); @Autowired RequestInfoService requestInfoService; static { MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); } public String buildUrl(Serializable indexCode) { StringBuilder urlBuilder = new StringBuilder(); urlBuilder.append("https://appstatic.emoney.cn/html/emapp/stock/note/?name="); urlBuilder.append(indexCode.toString()); urlBuilder.append("&emoneyScaleType=0&emoneyLandMode=0&token="); urlBuilder.append(requestInfoService.getDefaultRequestInfo().getAuthorization()); return urlBuilder.toString(); } @Test void test() { EmoneyClient.relogin(); String url = buildUrl(10002800); // 1.1 按照顺序放置 Header Request request = new Request.Builder() .url(url) .header("Host", "appstatic.emoney.cn") .header("Connection", "keep-alive") .header("Upgrade-Insecure-Requests", "1") .header("User-Agent", requestInfoService.getDefaultRequestInfo().getWebviewUserAgent()) .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") .header("X-Request-With", "cn.emoney.emstock") .header("Sec-Fetch-Site", "none") .header("Sec-Fetch-Mode", "navigate") .header("Sec-Fetch-User", "?1") .header("Sec-Fetch-Dest", "document") .header("Accept-Encoding", "gzip, deflate") .header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7") .build(); // 1.2 请求 OkHttpClient client = OkHttpClientProvider.getInstance(); List scripts = new ArrayList<>(); try (Response response = client.newCall(request).execute()) { if (!response.isSuccessful()) { log.error("1.2 请求指标说明基本页面失败:response.isSuccessful() is false"); return; } String responseBody = response.body().string(); Jerry jerryDoc = Jerry.of(responseBody); jerryDoc.s("script[src]").forEach(el -> { String absoluteURI = resolveUrl(url, el.attr("src")); log.info("script uri: {}", absoluteURI); if (absoluteURI != null) { scripts.add(absoluteURI); } }); } catch (IOException e) { // TODO Auto-generated catch block log.error("1.2 请求指标说明基本页面失败,IOException", e); e.printStackTrace(); return; } if (scripts.size() == 0) { log.error("未能获取基本页面内脚本链接"); return; } // 2. 加载 scripts,试图匹配包含在 js 中的指标说明 Pattern p = Pattern.compile("\\[(\\{(id:\\d+,?|data:\\[\\{(title:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|items:\\[(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?)+],?|image:\".*?\",?)+\\}+\\],?|name:\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*?\",?|nameCode:\"\\d+\",?)+\\},?)+\\]", Pattern.DOTALL); List matchGroups = new ArrayList<>(); Request.Builder scriptBuilder = new Request.Builder() .header("Host", "appstatic.emoney.cn") .header("Connection", "keep-alive") .header("User-Agent", requestInfoService.getDefaultRequestInfo().getWebviewUserAgent()) .header("Accept", "*/*") .header("X-Request-With", "cn.emoney.emstock") .header("Sec-Fetch-Site", "same-origin") .header("Sec-Fetch-Mode", "no-cors") .header("Sec-Fetch-Dest", "script") .header("Accept-Encoding", "gzip, deflate") .header("Referer", url) .header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"); for (String scriptUrl : scripts) { Request scriptRequest = scriptBuilder.url(scriptUrl).build(); try (Response response = client.newCall(scriptRequest).execute()) { if (!response.isSuccessful()) { log.warn("2. 读取页面内 script 请求失败, response.isSuccessful() is false, scriptUrl: {}", scriptUrl); return; } String responseBody = response.body().string(); // 尝试匹配 Matcher m = p.matcher(responseBody); while (m.find()) { String find = m.group(); Optional jsonStringify = stringifyJsArray(find); jsonStringify.ifPresent(jsonString -> matchGroups.add(jsonString)); } } catch (IOException e) { // TODO Auto-generated catch block log.warn("1.2 请求失败", e); e.printStackTrace(); return; } } // 将每个 jsonString 转换为 jsonArray,进一步转换成 IndexDetail List valid = new ArrayList<>(); for (String jsonString : matchGroups) { try { JsonNode root = MAPPER.readTree(jsonString); if (root.isArray()) { ArrayNode array = (ArrayNode) root; for (JsonNode obj : array) { if (obj.isObject() && matches(obj)) { NonParamsIndexDetail detail = MAPPER.treeToValue(obj, NonParamsIndexDetail.class); valid.add(detail); } } } } catch (Exception ignored) { log.error("试图转换匹配项出错", ignored); } } // 直接保存在本地 // 遍历 valid 内合法的 object, 获取 image Pattern numericPattern = Pattern.compile("^\\d+$"); for (NonParamsIndexDetail detail : valid) { // 判断 nameCode 是否是合法的代码 if (!numericPattern.matcher(detail.getNameCode()).matches()) { continue; } Request.Builder imageBuilder = new Request.Builder() .header("Host", "appstatic.emoney.cn") .header("Connection", "keep-alive") .header("User-Agent", requestInfoService.getDefaultRequestInfo().getWebviewUserAgent()) .header("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8") .header("X-Request-With", "cn.emoney.emstock") .header("Sec-Fetch-Site", "same-origin") .header("Sec-Fetch-Mode", "no-cors") .header("Sec-Fetch-Dest", "image") .header("Accept-Encoding", "gzip, deflate") .header("Referer", buildUrl(detail.getNameCode())) .header("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"); List datas = detail.getData(); for (NonParamsIndexDetailData data : datas) { if (data.getImage() != null) { String image = data.getImage(); if (image.startsWith("data:image/") && image.contains(";base64,")) { continue; } else { image = resolveUrl(url, image); Request imageRequest = imageBuilder.url(image).build(); try (Response response = client.newCall(imageRequest).execute()) { if (!response.isSuccessful()) { log.warn("获取详情图片请求失败, response.isSuccessful() is false, imageUrl: {}", image); continue; } byte[] bytes = response.body().bytes(); String contentType = response.body().contentType().toString(); String base64 = Base64.getEncoder().encodeToString(bytes); data.setImage("data:" + contentType + ";base64," + base64); log.debug("获取图片成功"); } catch (IOException e) { // TODO Auto-generated catch block log.warn("获取详情图片请求失败", e); e.printStackTrace(); return; } } } } try { Files.writeString( Path.of("./resources/conf/extra/nonParamsIndexDetail." + detail.getNameCode() + ".json"), MAPPER.valueToTree(detail).toPrettyString()); } catch (IllegalArgumentException | IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return; } /** * 判断某 jsonNode 是否为合法指标信息 * @param obj * @return */ public static boolean matches(JsonNode obj) { return obj.has("name") && obj.get("name").isTextual() && obj.has("nameCode") && obj.get("nameCode").isTextual() && obj.has("data") && obj.get("data").isArray() ; } /** * 使用 GraalVM JS 尝试 JSON.stringify * @param jsArrayText * @return */ public static Optional stringifyJsArray(String jsArrayText) { try (Context context = Context.create("js")) { Value result = context.eval("js", "JSON.stringify(" + jsArrayText + ")"); return result.isString() ? Optional.of(result.asString()) : Optional.empty(); } catch (Exception e) { log.warn("Failed to call JSON.stringify on {}", jsArrayText, e); return Optional.empty(); } } public static String resolveUrl(String pageUrl, String scriptSrc) { try { URI base = URI.create(pageUrl); return base.resolve(scriptSrc).toString(); } catch (Exception e) { log.info("转换 URI 错误,pageUrl: {}, scriptSrc: {} {}", pageUrl, scriptSrc, e); return null; } } }