feat: 完善JavaScript解析器功能

- 优化JsScriptLoader,支持JAR包内和文件系统的自动资源文件发现
- 移除预定义文件列表,完全依赖自动检测
- 添加getNoRedirect方法支持重定向处理
- 添加sendMultipartForm方法支持文件上传
- 添加代理配置支持
- 修复JSON解析的压缩处理问题
- 添加默认请求头支持(Accept-Encoding、User-Agent、Accept-Language)
- 更新文档,修正导出方式说明
- 优化README.md结构,删除不符合模块定位的内容
- 升级parser版本到10.2.1
This commit is contained in:
q
2025-10-22 17:33:50 +08:00
parent 7b364a0f90
commit 064efdf3f3
25 changed files with 644 additions and 271 deletions

View File

@@ -1,10 +1,11 @@
package cn.qaiu;
import cn.qaiu.parser.CustomParserRegistry;
import io.vertx.core.Vertx;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cn.qaiu.parser.custom.CustomParserRegistry;
public class WebClientVertxInit {
private Vertx vertx = null;
private static final WebClientVertxInit INSTANCE = new WebClientVertxInit();
@@ -36,4 +37,4 @@ public class WebClientVertxInit {
}
return INSTANCE.vertx;
}
}
}

View File

@@ -1,6 +1,10 @@
package cn.qaiu.parser;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.custom.CustomParserRegistry;
import cn.qaiu.parser.customjs.JsParserExecutor;
import org.apache.commons.lang3.StringUtils;
import java.net.URLEncoder;

View File

@@ -1,6 +1,7 @@
package cn.qaiu.parser;
package cn.qaiu.parser.custom;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.IPanTool;
import java.util.Map;
import java.util.regex.Pattern;

View File

@@ -1,8 +1,12 @@
package cn.qaiu.parser;
package cn.qaiu.parser.custom;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cn.qaiu.parser.PanDomainTemplate;
import cn.qaiu.parser.customjs.JsScriptLoader;
import cn.qaiu.parser.customjs.JsScriptMetadataParser;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

View File

@@ -1,4 +1,4 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import cn.qaiu.WebClientVertxInit;
import cn.qaiu.util.HttpResponseHelper;
@@ -6,11 +6,17 @@ import io.vertx.core.Future;
import io.vertx.core.MultiMap;
import io.vertx.core.Promise;
import io.vertx.core.buffer.Buffer;
import io.vertx.core.json.JsonArray;
import io.vertx.core.json.JsonObject;
import io.vertx.core.net.ProxyOptions;
import io.vertx.core.net.ProxyType;
import io.vertx.ext.web.client.HttpRequest;
import io.vertx.ext.web.client.HttpResponse;
import io.vertx.ext.web.client.WebClient;
import io.vertx.ext.web.client.WebClientOptions;
import io.vertx.ext.web.client.WebClientSession;
import io.vertx.ext.web.multipart.MultipartForm;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -36,6 +42,48 @@ public class JsHttpClient {
this.client = WebClient.create(WebClientVertxInit.get());
this.clientSession = WebClientSession.create(client);
this.headers = MultiMap.caseInsensitiveMultiMap();
// 设置默认的Accept-Encoding头以支持压缩响应
this.headers.set("Accept-Encoding", "gzip, deflate, br, zstd");
// 设置默认的User-Agent头
this.headers.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0");
// 设置默认的Accept-Language头
this.headers.set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6");
}
/**
* 带代理配置的构造函数
* @param proxyConfig 代理配置JsonObject包含typehostportusernamepassword
*/
public JsHttpClient(JsonObject proxyConfig) {
if (proxyConfig != null && proxyConfig.containsKey("type")) {
ProxyOptions proxyOptions = new ProxyOptions()
.setType(ProxyType.valueOf(proxyConfig.getString("type").toUpperCase()))
.setHost(proxyConfig.getString("host"))
.setPort(proxyConfig.getInteger("port"));
if (StringUtils.isNotEmpty(proxyConfig.getString("username"))) {
proxyOptions.setUsername(proxyConfig.getString("username"));
}
if (StringUtils.isNotEmpty(proxyConfig.getString("password"))) {
proxyOptions.setPassword(proxyConfig.getString("password"));
}
this.client = WebClient.create(WebClientVertxInit.get(),
new WebClientOptions()
.setUserAgentEnabled(false)
.setProxyOptions(proxyOptions));
this.clientSession = WebClientSession.create(client);
} else {
this.client = WebClient.create(WebClientVertxInit.get());
this.clientSession = WebClientSession.create(client);
}
this.headers = MultiMap.caseInsensitiveMultiMap();
// 设置默认的Accept-Encoding头以支持压缩响应
this.headers.set("Accept-Encoding", "gzip, deflate, br, zstd");
// 设置默认的User-Agent头
this.headers.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0");
// 设置默认的Accept-Language头
this.headers.set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6");
}
/**
@@ -132,7 +180,7 @@ public class JsHttpClient {
}
/**
* 发送表单数据
* 发送表单数据简单键值对
* @param data 表单数据
* @return HTTP响应
*/
@@ -152,6 +200,45 @@ public class JsHttpClient {
});
}
/**
* 发送multipart表单数据支持文件上传
* @param url 请求URL
* @param data 表单数据支持
* - Map<String, String>: 文本字段
* - Map<String, Object>: 混合字段Object可以是Stringbyte[]或Buffer
* @return HTTP响应
*/
public JsHttpResponse sendMultipartForm(String url, Map<String, Object> data) {
return executeRequest(() -> {
HttpRequest<Buffer> request = client.postAbs(url);
if (!headers.isEmpty()) {
request.putHeaders(headers);
}
MultipartForm form = MultipartForm.create();
if (data != null) {
for (Map.Entry<String, Object> entry : data.entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();
if (value instanceof String) {
form.attribute(key, (String) value);
} else if (value instanceof byte[]) {
form.binaryFileUpload(key, key, Buffer.buffer((byte[]) value), "application/octet-stream");
} else if (value instanceof Buffer) {
form.binaryFileUpload(key, key, (Buffer) value, "application/octet-stream");
} else if (value != null) {
// 其他类型转换为字符串
form.attribute(key, value.toString());
}
}
}
return request.sendMultipartForm(form);
});
}
/**
* 发送JSON数据
* @param data JSON数据
@@ -224,26 +311,13 @@ public class JsHttpClient {
*/
public Object json() {
try {
String body = response.bodyAsString();
if (body == null || body.trim().isEmpty()) {
JsonObject jsonObject = HttpResponseHelper.asJson(response);
if (jsonObject == null || jsonObject.isEmpty()) {
return null;
}
// 尝试解析为JSON对象
try {
JsonObject jsonObject = response.bodyAsJsonObject();
// 将JsonObject转换为Map这样JavaScript可以正确访问
return jsonObject.getMap();
} catch (Exception e) {
// 如果解析为对象失败尝试解析为数组
try {
return response.bodyAsJsonArray().getList();
} catch (Exception e2) {
// 如果都失败了返回原始字符串
log.warn("无法解析为JSON返回原始字符串: {}", body);
return body;
}
}
// 将JsonObject转换为Map这样JavaScript可以正确访问
return jsonObject.getMap();
} catch (Exception e) {
log.error("解析JSON响应失败", e);
throw new RuntimeException("解析JSON响应失败: " + e.getMessage(), e);

View File

@@ -1,4 +1,4 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -1,9 +1,12 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import cn.qaiu.entity.FileInfo;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.IPanTool;
import cn.qaiu.parser.custom.CustomParserConfig;
import io.vertx.core.Future;
import io.vertx.core.Promise;
import io.vertx.core.json.JsonObject;
import org.openjdk.nashorn.api.scripting.ScriptObjectMirror;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -39,7 +42,14 @@ public class JsParserExecutor implements IPanTool {
this.config = config;
this.shareLinkInfo = shareLinkInfo;
this.engine = initEngine();
this.httpClient = new JsHttpClient();
// 检查是否有代理配置
JsonObject proxyConfig = null;
if (shareLinkInfo.getOtherParam().containsKey("proxy")) {
proxyConfig = (JsonObject) shareLinkInfo.getOtherParam().get("proxy");
}
this.httpClient = new JsHttpClient(proxyConfig);
this.jsLogger = new JsLogger("JsParser-" + config.getType());
this.shareLinkInfoWrapper = new JsShareLinkInfoWrapper(shareLinkInfo);
}

View File

@@ -1,8 +1,10 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cn.qaiu.parser.custom.CustomParserConfig;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
@@ -66,33 +68,28 @@ public class JsScriptLoader {
List<CustomParserConfig> configs = new ArrayList<>();
try {
// 获取资源目录的输入流
InputStream resourceStream = JsScriptLoader.class.getClassLoader()
.getResourceAsStream(RESOURCE_PATH);
// 尝试使用反射方式获取JAR包内的资源文件列表
List<String> resourceFiles = getResourceFileList();
if (resourceStream == null) {
log.debug("资源目录 {} 不存在", RESOURCE_PATH);
return configs;
}
// 按文件名排序确保加载顺序一致
resourceFiles.sort(String::compareTo);
// 读取资源目录下的所有文件
String resourcePath = JsScriptLoader.class.getClassLoader()
.getResource(RESOURCE_PATH).getPath();
try (Stream<Path> paths = Files.walk(Paths.get(resourcePath))) {
paths.filter(Files::isRegularFile)
.filter(path -> path.toString().endsWith(".js"))
.filter(path -> !isExcludedFile(path.getFileName().toString()))
.forEach(path -> {
try {
String jsCode = Files.readString(path, StandardCharsets.UTF_8);
CustomParserConfig config = JsScriptMetadataParser.parseScript(jsCode);
configs.add(config);
log.debug("从资源目录加载脚本: {}", path.getFileName());
} catch (Exception e) {
log.warn("加载资源脚本失败: {}", path.getFileName(), e);
}
});
for (String resourceFile : resourceFiles) {
try {
InputStream inputStream = JsScriptLoader.class.getClassLoader()
.getResourceAsStream(resourceFile);
if (inputStream != null) {
String jsCode = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
CustomParserConfig config = JsScriptMetadataParser.parseScript(jsCode);
configs.add(config);
String fileName = resourceFile.substring(resourceFile.lastIndexOf('/') + 1);
log.debug("从资源目录加载脚本: {}", fileName);
}
} catch (Exception e) {
log.warn("加载资源脚本失败: {}", resourceFile, e);
}
}
} catch (Exception e) {
@@ -102,6 +99,92 @@ public class JsScriptLoader {
return configs;
}
/**
* 尝试使用反射方式获取JAR包内的资源文件列表
*/
private static List<String> getResourceFileList() {
List<String> resourceFiles = new ArrayList<>();
try {
// 尝试获取资源目录的URL
java.net.URL resourceUrl = JsScriptLoader.class.getClassLoader()
.getResource(RESOURCE_PATH);
if (resourceUrl != null) {
String protocol = resourceUrl.getProtocol();
if ("jar".equals(protocol)) {
// JAR包内的资源
resourceFiles = getJarResourceFiles(resourceUrl);
} else if ("file".equals(protocol)) {
// 文件系统中的资源开发环境
resourceFiles = getFileSystemResourceFiles(resourceUrl);
}
}
} catch (Exception e) {
log.debug("使用反射方式获取资源文件列表失败,将使用预定义列表", e);
}
return resourceFiles;
}
/**
* 获取JAR包内的资源文件列表
*/
private static List<String> getJarResourceFiles(java.net.URL jarUrl) {
List<String> resourceFiles = new ArrayList<>();
try {
String jarPath = jarUrl.getPath().substring(5, jarUrl.getPath().indexOf("!"));
java.util.jar.JarFile jarFile = new java.util.jar.JarFile(jarPath);
java.util.Enumeration<java.util.jar.JarEntry> entries = jarFile.entries();
while (entries.hasMoreElements()) {
java.util.jar.JarEntry entry = entries.nextElement();
String entryName = entry.getName();
if (entryName.startsWith(RESOURCE_PATH + "/") &&
entryName.endsWith(".js") &&
!isExcludedFile(entryName.substring(entryName.lastIndexOf('/') + 1))) {
resourceFiles.add(entryName);
}
}
jarFile.close();
} catch (Exception e) {
log.debug("解析JAR包资源文件失败", e);
}
return resourceFiles;
}
/**
* 获取文件系统中的资源文件列表
*/
private static List<String> getFileSystemResourceFiles(java.net.URL fileUrl) {
List<String> resourceFiles = new ArrayList<>();
try {
java.io.File resourceDir = new java.io.File(fileUrl.getPath());
if (resourceDir.exists() && resourceDir.isDirectory()) {
java.io.File[] files = resourceDir.listFiles();
if (files != null) {
for (java.io.File file : files) {
if (file.isFile() && file.getName().endsWith(".js") &&
!isExcludedFile(file.getName())) {
resourceFiles.add(RESOURCE_PATH + "/" + file.getName());
}
}
}
}
} catch (Exception e) {
log.debug("解析文件系统资源文件失败", e);
}
return resourceFiles;
}
/**
* 从外部目录加载JavaScript脚本
*/

View File

@@ -1,9 +1,11 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cn.qaiu.parser.custom.CustomParserConfig;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;

View File

@@ -1,4 +1,4 @@
package cn.qaiu.parser;
package cn.qaiu.parser.customjs;
import cn.qaiu.entity.ShareLinkInfo;
import org.slf4j.Logger;

View File

@@ -59,7 +59,7 @@ public class HttpResponseHelper {
case "gzip" -> decompressGzip(compressed);
case "deflate" -> decompressDeflate(compressed);
case "br" -> decompressBrotli(compressed);
//case "zstd" -> decompressZstd(compressed);
case "zstd" -> compressed.toString(StandardCharsets.UTF_8); // 暂时返回原始内容
default -> throw new UnsupportedOperationException("不支持的 Content-Encoding: " + encoding);
};
}

View File

@@ -29,9 +29,12 @@
/**
* @typedef {Object} JsHttpClient
* @property {function(string): JsHttpResponse} get - 发起GET请求
* @property {function(string): JsHttpResponse} getWithRedirect - 发起GET请求并跟随重定向
* @property {function(string): JsHttpResponse} getNoRedirect - 发起GET请求但不跟随重定向用于获取Location头
* @property {function(string, any=): JsHttpResponse} post - 发起POST请求
* @property {function(string, string): JsHttpClient} putHeader - 设置请求头
* @property {function(Object): JsHttpResponse} sendForm - 发送表单数据
* @property {function(Object): JsHttpResponse} sendForm - 发送简单表单数据
* @property {function(string, Object): JsHttpResponse} sendMultipartForm - 发送multipart表单数据支持文件上传
* @property {function(any): JsHttpResponse} sendJson - 发送JSON数据
*/

View File

@@ -2,8 +2,12 @@ package cn.qaiu.parser;
import cn.qaiu.entity.FileInfo;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.custom.CustomParserRegistry;
import cn.qaiu.parser.customjs.JsParserExecutor;
import cn.qaiu.WebClientVertxInit;
import io.vertx.core.Vertx;
import io.vertx.core.json.JsonObject;
import org.junit.Test;
import java.util.HashMap;

View File

@@ -1,6 +1,8 @@
package cn.qaiu.parser;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.custom.CustomParserRegistry;
import io.vertx.core.Future;
import io.vertx.core.Promise;
import org.junit.After;

View File

@@ -1,6 +1,8 @@
package cn.qaiu.parser;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.custom.CustomParserRegistry;
import io.vertx.core.Future;
import io.vertx.core.Promise;

View File

@@ -2,6 +2,9 @@ package cn.qaiu.parser;
import cn.qaiu.entity.FileInfo;
import cn.qaiu.entity.ShareLinkInfo;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.custom.CustomParserRegistry;
import cn.qaiu.parser.customjs.JsParserExecutor;
import cn.qaiu.WebClientVertxInit;
import io.vertx.core.Vertx;
import org.junit.Test;

View File

@@ -2,6 +2,9 @@ package cn.qaiu.parser;
import org.junit.Test;
import cn.qaiu.parser.custom.CustomParserConfig;
import cn.qaiu.parser.customjs.JsScriptLoader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;