diff --git a/parser/src/main/java/cn/qaiu/parser/PanDomainTemplate.java b/parser/src/main/java/cn/qaiu/parser/PanDomainTemplate.java index f66b4fc..a9334ad 100644 --- a/parser/src/main/java/cn/qaiu/parser/PanDomainTemplate.java +++ b/parser/src/main/java/cn/qaiu/parser/PanDomainTemplate.java @@ -247,14 +247,14 @@ public enum PanDomainTemplate { "https://cowtransfer.com/s/{shareKey}", CowTool.class), CT("城通网盘", - compile("https://(?:[a-zA-Z\\d-]+\\.)?(ctfile|545c|u062|ghpym|474b)\\.com/f(ile)?/" + - "(?[0-9a-zA-Z_-]+)(\\?p=(?\\w+))?"), + compile("https?://(?:[a-zA-Z\\d-]+\\.)?(ctfile|545c|u062|ghpym|474b)\\.com/f(ile)?/" + + "(?[0-9a-zA-Z_-]+)/?(?:\\?(?:(?:[^#&]*&)*p=(?\\w+)(?:&[^#]*)?|[^#]*))?"), "https://ctfile.com/file/{shareKey}", CtTool.class), // https://url94.ctfile.com/d/64115194-164803691-48508c?p=7609&d=164803691&fk=decb36 CTD("城通网盘-目录", - compile("https://(?:[a-zA-Z\\d-]+\\.)?(ctfile|545c|u062|ghpym|474b)\\.com/d/" + - "(?[0-9a-zA-Z_-]+)(\\?p=(?\\w+))?"), + compile("https?://(?:[a-zA-Z\\d-]+\\.)?(ctfile|545c|u062|ghpym|474b)\\.com/d/" + + "(?[0-9a-zA-Z_-]+)/?(?:\\?(?:(?:[^#&]*&)*p=(?\\w+)(?:&[^#]*)?|[^#]*))?"), "https://ctfile.com/d/{shareKey}", CtTool.class), // https://www.vyuyun.com/s/QMa6ie?password=I4KG7H diff --git a/parser/src/main/java/cn/qaiu/parser/impl/CeTool.java b/parser/src/main/java/cn/qaiu/parser/impl/CeTool.java index 09f9956..7daac1f 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/CeTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/CeTool.java @@ -2,6 +2,7 @@ package cn.qaiu.parser.impl; import cn.qaiu.entity.FileInfo; import cn.qaiu.entity.ShareLinkInfo; +import cn.qaiu.parser.IPanTool; import cn.qaiu.parser.PanBase; import io.vertx.core.Future; import io.vertx.core.buffer.Buffer; @@ -160,6 +161,7 @@ public class CeTool extends PanBase { } catch (Exception e) { log.debug("v3 share API解析失败: {}", e.getMessage()); } + tryV4ShareApi(baseUrl, key, pwd); }).onFailure(t -> { log.debug("v3 share API请求失败: {}", t.getMessage()); // 请求失败,尝试 v4 或下一个解析器 @@ -206,7 +208,8 @@ public class CeTool extends PanBase { */ private void delegateToCe4Tool() { log.debug("检测到Cloudreve 4.x,转发到Ce4Tool处理"); - new Ce4Tool(shareLinkInfo).parse().onComplete(promise); + Ce4Tool ce4Tool = new Ce4Tool(shareLinkInfo); + IPanTool.closeAfter(ce4Tool, ce4Tool::parse).onComplete(promise); } diff --git a/parser/src/main/java/cn/qaiu/parser/impl/CtTool.java b/parser/src/main/java/cn/qaiu/parser/impl/CtTool.java index b075513..c1e2c61 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/CtTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/CtTool.java @@ -3,6 +3,7 @@ package cn.qaiu.parser.impl; import cn.qaiu.entity.FileInfo; import cn.qaiu.entity.ShareLinkInfo; import cn.qaiu.parser.PanBase; +import cn.qaiu.util.CommonUtils; import cn.qaiu.util.FileSizeConverter; import io.vertx.core.Future; import io.vertx.core.Promise; @@ -12,6 +13,10 @@ import io.vertx.core.json.JsonObject; import io.vertx.ext.web.client.HttpRequest; import io.vertx.uritemplate.UriTemplate; +import java.net.URI; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -24,19 +29,22 @@ import java.util.regex.Pattern; */ public class CtTool extends PanBase { private static final String API_URL_PREFIX = "https://webapi.ctfile.com"; + private static final String SHARE_FILE_URL_PREFIX = "https://ctfile.com/file/"; + private static final String AJAX_ACCEPT = "application/json, text/javascript, */*; q=0.01"; + private static final String BROWSER_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; + private static final int FILE_LIST_PAGE_SIZE = 200; + private static final int MAX_FILE_LIST_PAGES = 50; // https://webapi.ctfile.com/getfile.php?path=f&f=64115194-17569800420720-06c697& // passcode=7609&r=0.6611183001986635&ref=&url=https%3A%2F%2Furl94.ctfile.com%2Ff%2F64115194-17569800420720-06c697%3Fp%3D7609 private static final String API1 = API_URL_PREFIX + "/getfile.php?path={path}" + "&f={shareKey}&passcode={pwd}&r={rand}&ref=&url={url}"; - // https://webapi.ctfile.com/get_file_url.php?uid=64115194&fid=17569800420720&folder_id=0& - // share_id=&file_chk=af5c8757a49cbc69a557eb3da59b246c&start_time=1780471868&wait_seconds=0& - // mb=0&app=0&acheck=1&verifycode=1780471868.2951fe63abedf36ec02f34ed5711ce70&rd=0.36350981353622636 - private static final String API2 = API_URL_PREFIX + "/get_file_url.php?" + - "uid={uid}&fid={fid}&folder_id=0&share_id=&file_chk={file_chk}" + - "&start_time={start_time}&wait_seconds={wait_seconds}&mb=0&app=0&acheck=1" + - "&verifycode={verifycode}&rd={rand}"; + // https://webapi.ctfile.com/get_down_url.php?uid=64115194&fid=17569800420720& + // file_chk=af5c8757a49cbc69a557eb3da59b246c&start_time=1780471868&wait_seconds=0&rd=0.36... + private static final String API2 = API_URL_PREFIX + "/get_down_url.php?" + + "uid={uid}&fid={fid}&file_chk={file_chk}" + + "&start_time={start_time}&wait_seconds={wait_seconds}&rd={rand}"; // https://webapi.ctfile.com/getdir.php?path=d&d=64115194-164803691-48508c& // folder_id=164803691&fk=decb36&passcode=7609&r=0.23...&ref=&url=https://url94.ctfile.com/d/... @@ -44,15 +52,22 @@ public class CtTool extends PanBase { "&d={shareKey}&folder_id={folder_id}&fk={fk}&passcode={pwd}&r={rand}&ref=&url={url}"; // DataTables参数,用于获取目录文件列表 - private static final String FILE_LIST_PARAMS = "&sEcho=1&iColumns=4&sColumns=%2C%2C%2C" + - "&iDisplayStart=0&iDisplayLength=500&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3" + + private static final String FILE_LIST_PARAMS_TEMPLATE = "&sEcho=1&iColumns=4&sColumns=%2C%2C%2C" + + "&iDisplayStart={start}&iDisplayLength={length}" + + "&mDataProp_0=0&sSearch_0=&bRegex_0=false&bSearchable_0=true&bSortable_0=false" + + "&mDataProp_1=1&sSearch_1=&bRegex_1=false&bSearchable_1=true&bSortable_1=true" + + "&mDataProp_2=2&sSearch_2=&bRegex_2=false&bSearchable_2=true&bSortable_2=true" + + "&mDataProp_3=3&sSearch_3=&bRegex_3=false&bSearchable_3=true&bSortable_3=true" + + "&sSearch=&bRegex=false" + "&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1"; // 文件列表HTML解析正则 - private static final Pattern FILE_ID_PATTERN = Pattern.compile("value=\"f(\\d+)\""); - private static final Pattern FILE_HREF_PATTERN = Pattern.compile("href=\"#/f/([^\"]+)\""); - private static final Pattern FILE_NAME_PATTERN = Pattern.compile(">([^<]+)"); - private static final Pattern FILE_ICON_PATTERN = Pattern.compile("alt=\"([^\"]+)\""); + private static final Pattern FILE_ID_PATTERN = Pattern.compile("value=[\"']f(\\d+)[\"']"); + private static final Pattern FOLDER_ID_PATTERN = Pattern.compile("value=[\"']d(\\d+)[\"']"); + private static final Pattern FILE_HREF_PATTERN = Pattern.compile("href=[\"']#/f/([^\"']+)[\"']"); + private static final Pattern FILE_NAME_PATTERN = Pattern.compile("]*>([^<]+)", Pattern.CASE_INSENSITIVE); + private static final Pattern FILE_ICON_PATTERN = Pattern.compile("alt=[\"']([^\"']+)[\"']"); + private static final Pattern SUBDIR_PATTERN = Pattern.compile("load_subdir\\s*\\((\\d+)\\s*,\\s*['\"]([^'\"]+)['\"]\\)"); /** * 子类重写此构造方法不需要添加额外逻辑 @@ -73,81 +88,105 @@ public class CtTool extends PanBase { @Override public Future parse() { final String shareKey = shareLinkInfo.getShareKey(); - if (shareKey.indexOf('-') == -1) { + if (shareKey == null || shareKey.indexOf('-') == -1) { fail("shareKey格式不正确找不到'-': {}", shareKey); return promise.future(); } String[] split = shareKey.split("-"); - String uid = split[0], fid = split[1]; - // 获取url path - int i1 = shareLinkInfo.getShareUrl().indexOf("com/"); - int i2 = shareLinkInfo.getShareUrl().lastIndexOf("/"); - String path = shareLinkInfo.getShareUrl().substring(i1 + 4, i2); + if (split.length < 2 || split[0].isBlank() || split[1].isBlank()) { + fail("shareKey格式不正确: {}", shareKey); + return promise.future(); + } + String fallbackUid = split[0], fallbackFid = split[1]; + String path = extractPath(shareLinkInfo.getShareUrl()); - HttpRequest bufferHttpRequest1 = clientSession.getAbs(UriTemplate.of(API1)) + HttpRequest bufferHttpRequest1 = withCtAjaxHeaders(clientSession.getAbs(UriTemplate.of(API1)) .setTemplateParam("path", path) .setTemplateParam("shareKey", shareKey) .setTemplateParam("pwd", shareLinkInfo.getSharePassword()) .setTemplateParam("rand", String.valueOf(Math.random())) - .setTemplateParam("url", shareLinkInfo.getShareUrl()); + .setTemplateParam("url", shareLinkInfo.getShareUrl()), shareLinkInfo.getShareUrl()); bufferHttpRequest1 .send().onSuccess(res -> { - var resJson = asJson(res); - if (resJson.containsKey("file")) { - var fileJson = resJson.getJsonObject("file"); - if (fileJson.containsKey("file_chk")) { - var file_chk = fileJson.getString("file_chk"); - String startTime = fileJson.getValue("start_time").toString(); - String waitSeconds = fileJson.getValue("wait_seconds").toString(); - String verifycode = fileJson.getString("verifycode"); - - // 提取文件信息并存储 - FileInfo fileInfo = new FileInfo() - .setFileName(fileJson.getString("file_name")) - .setFileId(String.valueOf(fileJson.getLong("file_id", 0L))) - .setSizeStr(fileJson.getString("file_size")) - .setCreateTime(fileJson.getString("file_time")) - .setCreateBy(fileJson.getString("username")) - .setFileType("file") - .setPanType(shareLinkInfo.getType()); - shareLinkInfo.getOtherParam().put("fileInfo", fileInfo); - - HttpRequest bufferHttpRequest2 = clientSession.getAbs(UriTemplate.of(API2)) - .setTemplateParam("uid", uid) - .setTemplateParam("fid", fid) - .setTemplateParam("file_chk", file_chk) - .setTemplateParam("start_time", startTime) - .setTemplateParam("wait_seconds", waitSeconds) - .setTemplateParam("verifycode", verifycode) - .setTemplateParam("rand", String.valueOf(Math.random())); - bufferHttpRequest2 - .send().onSuccess(res2 -> { - JsonObject resJson2 = asJson(res2); - if (resJson2.containsKey("downurl")) { - String downloadUrl = resJson2.getString("downurl"); - - // 存储下载元数据,包括必要的请求头 - Map headers = new HashMap<>(); - headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"); - headers.put("Referer", shareLinkInfo.getShareUrl()); - - // 使用新的 completeWithMeta 方法 - completeWithMeta(downloadUrl, headers); - } else { - fail("解析失败, 可能分享已失效: json: {} 字段 {} 不存在", resJson2, "downurl"); - } - }).onFailure(handleFail(bufferHttpRequest1.queryParams().toString())); - } else { - fail("解析失败, file_chk找不到, 可能分享已失效或者分享密码不对: {}", fileJson); + try { + var resJson = asJson(res); + if (resJson == null || resJson.isEmpty()) { + fail("解析失败, 上游返回空响应或非JSON响应"); + return; } - } else { - fail("解析失败, 文件信息为空, 可能分享已失效"); + Object fileValue = resJson.getValue("file"); + if (!(fileValue instanceof JsonObject)) { + fail("解析失败, 文件信息为空或格式错误, 可能分享已失效: {}", resJson); + return; + } + var fileJson = (JsonObject) fileValue; + String uid = resolveDownloadUid(fileJson, fallbackUid); + String fid = resolveDownloadFid(fileJson, fallbackFid); + String fileChk = fileJson.getString("file_chk"); + String startTime = valueToString(fileJson.getValue("start_time")); + String waitSeconds = valueToString(fileJson.getValue("wait_seconds")); + if (uid.isBlank() || fid.isBlank() || fileChk == null || fileChk.isBlank() + || startTime.isBlank() || waitSeconds.isBlank()) { + fail("解析失败, 下载参数不完整, 可能分享已失效或者分享密码不对: {}", fileJson); + return; + } + + // 提取文件信息并存储 + FileInfo fileInfo = new FileInfo() + .setFileName(fileJson.getString("file_name")) + .setFileId(fid) + .setSizeStr(fileJson.getString("file_size")) + .setCreateTime(fileJson.getString("file_time")) + .setCreateBy(fileJson.getString("username")) + .setFileType("file") + .setPanType(shareLinkInfo.getType()); + shareLinkInfo.getOtherParam().put("fileInfo", fileInfo); + + HttpRequest bufferHttpRequest2 = withCtAjaxHeaders(clientSession.getAbs(UriTemplate.of(API2)) + .setTemplateParam("uid", uid) + .setTemplateParam("fid", fid) + .setTemplateParam("file_chk", fileChk) + .setTemplateParam("start_time", startTime) + .setTemplateParam("wait_seconds", waitSeconds) + .setTemplateParam("rand", String.valueOf(Math.random())), shareLinkInfo.getShareUrl()); + bufferHttpRequest2 + .send().onSuccess(res2 -> handleDownloadUrlResponse(res2)) + .onFailure(t -> fail("下载链接请求失败: {}", t.getMessage())); + } catch (Exception e) { + fail("解析失败: {}", e.getMessage()); } - }).onFailure(handleFail(bufferHttpRequest1.queryParams().toString())); + }).onFailure(t -> fail("文件信息请求失败: {}", t.getMessage())); return promise.future(); } + private void handleDownloadUrlResponse(io.vertx.ext.web.client.HttpResponse res) { + try { + JsonObject resJson = asJson(res); + if (resJson == null || resJson.isEmpty()) { + fail("解析失败, 下载接口返回空响应或非JSON响应"); + return; + } + String downloadUrl = resJson.getString("downurl"); + if (downloadUrl == null || downloadUrl.isBlank()) { + fail("解析失败, 可能分享已失效: json: {} 字段 {} 不存在", resJson, "downurl"); + return; + } + + // 存储下载元数据,包括必要的请求头 + Map headers = new HashMap<>(); + headers.put("User-Agent", BROWSER_UA); + if (shareLinkInfo.getShareUrl() != null && !shareLinkInfo.getShareUrl().isBlank()) { + headers.put("Referer", shareLinkInfo.getShareUrl()); + } + + // 使用新的 completeWithMeta 方法 + completeWithMeta(downloadUrl, headers); + } catch (Exception e) { + fail("解析失败, 下载接口响应处理异常: {}", e.getMessage()); + } + } + @Override public Future> parseFileList() { Promise> listPromise = Promise.promise(); @@ -157,122 +196,478 @@ public class CtTool extends PanBase { final String pwd = shareLinkInfo.getSharePassword(); // shareKey格式: uid-folder_id-hash (例如 64115194-164803691-48508c) + if (shareKey == null) { + listPromise.fail(baseMsg() + " shareKey为空"); + return listPromise.future(); + } String[] split = shareKey.split("-"); if (split.length < 2) { listPromise.fail(baseMsg() + " shareKey格式不正确: " + shareKey); return listPromise.future(); } - String folderId = split[1]; + String path = extractPath(shareUrl); + Object dirId = shareLinkInfo.getOtherParam() == null ? null : shareLinkInfo.getOtherParam().get("dirId"); + DirectoryContext directoryContext = resolveDirectoryContext(shareUrl, dirId); - // 从分享URL中提取fk参数 - String fk = extractQueryParam(shareUrl, "fk"); - - // 从URL中提取path (例如从 "https://url94.ctfile.com/d/xxx?p=..." 中提取 "d") - int comIdx = shareUrl.indexOf("com/"); - int qIdx = shareUrl.indexOf('?'); - String pathAndKey = qIdx > 0 ? shareUrl.substring(comIdx + 4, qIdx) : shareUrl.substring(comIdx + 4); - int slashIdx = pathAndKey.indexOf('/'); - String path = slashIdx > 0 ? pathAndKey.substring(0, slashIdx) : pathAndKey; - - clientSession.getAbs(UriTemplate.of(API_GETDIR)) + HttpRequest getDirRequest = withCtAjaxHeaders(clientSession.getAbs(UriTemplate.of(API_GETDIR)) .setTemplateParam("path", path) .setTemplateParam("shareKey", shareKey) - .setTemplateParam("folder_id", folderId) - .setTemplateParam("fk", fk != null ? fk : "") + .setTemplateParam("folder_id", directoryContext.folderId) + .setTemplateParam("fk", directoryContext.folderKey) .setTemplateParam("pwd", pwd != null ? pwd : "") .setTemplateParam("rand", String.valueOf(Math.random())) - .setTemplateParam("url", shareUrl) - .send().onSuccess(res -> { - var resJson = asJson(res); - if (!resJson.containsKey("file")) { - listPromise.fail(baseMsg() + " 目录解析失败: " + resJson.encode()); - return; - } - var dirInfo = resJson.getJsonObject("file"); - String fileListRelUrl = dirInfo.getString("url"); - if (fileListRelUrl == null) { - listPromise.fail(baseMsg() + " 文件列表URL为空"); - return; - } + .setTemplateParam("url", shareUrl), shareUrl); - String fileListUrl = API_URL_PREFIX + fileListRelUrl + FILE_LIST_PARAMS; - clientSession.getAbs(fileListUrl) - .send().onSuccess(res2 -> { - var listJson = asJson(res2); - JsonArray aaData = listJson.getJsonArray("aaData"); - if (aaData == null) { - listPromise.fail(baseMsg() + " 文件列表为空"); - return; - } - List fileList = new ArrayList<>(); - String panType = shareLinkInfo.getType(); - for (int i = 0; i < aaData.size(); i++) { - var row = aaData.getJsonArray(i); - try { - String checkboxHtml = row.getString(0); - String nameCellHtml = row.getString(1); - String sizeStr = row.getString(2).trim(); + getDirRequest.send().onSuccess(res -> { + try { + var resJson = asJson(res); + if (resJson == null || resJson.isEmpty()) { + failListPromise(listPromise, baseMsg() + " 目录解析失败: 上游返回空响应或非JSON响应"); + return; + } + if (!resJson.containsKey("file")) { + failListPromise(listPromise, baseMsg() + " 目录解析失败: " + resJson.encode()); + return; + } + Object dirInfoValue = resJson.getValue("file"); + if (!(dirInfoValue instanceof JsonObject)) { + failListPromise(listPromise, baseMsg() + " 目录解析失败: file字段格式错误: " + resJson.encode()); + return; + } + JsonObject dirInfo = (JsonObject) dirInfoValue; + Object fileListUrlValue = dirInfo.getValue("url"); + String fileListRelUrl = fileListUrlValue instanceof String ? ((String) fileListUrlValue).trim() : ""; + if (fileListRelUrl.isBlank()) { + failListPromise(listPromise, baseMsg() + " " + buildDirectoryFailureMessage(resJson, dirInfo)); + return; + } - // 从checkbox HTML中提取文件ID - String fileId = null; - Matcher idMatcher = FILE_ID_PATTERN.matcher(checkboxHtml); - if (idMatcher.find()) fileId = idMatcher.group(1); - - // 从文件名单元格HTML中提取临时分享key - String fileShareKey = null; - Matcher hrefMatcher = FILE_HREF_PATTERN.matcher(nameCellHtml); - if (hrefMatcher.find()) fileShareKey = hrefMatcher.group(1); - - // 提取文件名 - String fileName = null; - Matcher nameMatcher = FILE_NAME_PATTERN.matcher(nameCellHtml); - if (nameMatcher.find()) fileName = nameMatcher.group(1).trim(); - - // 提取文件图标/类型 - String fileIcon = null; - Matcher iconMatcher = FILE_ICON_PATTERN.matcher(nameCellHtml); - if (iconMatcher.find()) fileIcon = iconMatcher.group(1); - - if (fileName == null || fileShareKey == null) continue; - - long sizeBytes = 0; - try { - sizeBytes = FileSizeConverter.convertToBytes(sizeStr); - } catch (Exception ignored) {} - - FileInfo fileInfo = new FileInfo() - .setFileName(fileName) - .setFileId(fileId) - .setSizeStr(sizeStr) - .setSize(sizeBytes) - .setFileType(fileIcon) - .setFileIcon(fileIcon) - .setPanType(panType) - .setParserUrl(String.format("%s/v2/redirectUrl/%s/%s", - getDomainName(), panType, fileShareKey)); - fileList.add(fileInfo); - } catch (Exception e) { - log.warn("解析文件行失败: {}", e.getMessage()); - } - } - listPromise.complete(fileList); - }).onFailure(listPromise::fail); - }).onFailure(listPromise::fail); + fetchFileListPage(toCtApiUrl(fileListRelUrl), 0, 0, new ArrayList<>(), listPromise, + shareLinkInfo.getType(), getDomainName(), shareUrl, pwd); + } catch (Exception e) { + failListPromise(listPromise, baseMsg() + " 目录解析失败: " + e.getMessage()); + } + }).onFailure(t -> failListPromise(listPromise, t)); return listPromise.future(); } - private String extractQueryParam(String url, String paramName) { - if (url == null) return null; + private void fetchFileListPage(String fileListBaseUrl, int start, int pageIndex, List fileList, + Promise> listPromise, String panType, String domainName, + String shareUrl, String pwd) { + try { + if (pageIndex >= MAX_FILE_LIST_PAGES) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: 分页超过最大限制 " + MAX_FILE_LIST_PAGES + + " (start=" + start + ", length=" + FILE_LIST_PAGE_SIZE + ")"); + return; + } + + String fileListUrl = appendQueryParams(fileListBaseUrl, + buildFileListParams(start, FILE_LIST_PAGE_SIZE) + "&_=" + System.currentTimeMillis()); + withCtAjaxHeaders(clientSession.getAbs(fileListUrl), shareUrl) + .send() + .onSuccess(res -> handleFileListPageResponse(fileListBaseUrl, start, pageIndex, fileList, + listPromise, panType, domainName, shareUrl, pwd, res)) + .onFailure(t -> failListPromise(listPromise, t)); + } catch (Exception e) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: " + e.getMessage() + + " (start=" + start + ", length=" + FILE_LIST_PAGE_SIZE + ")"); + } + } + + private void handleFileListPageResponse(String fileListBaseUrl, int start, int pageIndex, List fileList, + Promise> listPromise, String panType, String domainName, + String shareUrl, String pwd, io.vertx.ext.web.client.HttpResponse res) { + try { + var listJson = asJson(res); + if (listJson == null || listJson.isEmpty()) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: 上游返回空响应或非JSON响应" + + " (start=" + start + ", length=" + FILE_LIST_PAGE_SIZE + ")"); + return; + } + Object aaDataValue = listJson.getValue("aaData"); + if (!(aaDataValue instanceof JsonArray)) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: aaData为空: " + listJson.encode()); + return; + } + JsonArray aaData = (JsonArray) aaDataValue; + for (int i = 0; i < aaData.size(); i++) { + try { + Object rowValue = aaData.getValue(i); + if (!(rowValue instanceof JsonArray)) { + log.warn("城通文件列表行格式错误: {}", rowValue); + continue; + } + FileInfo fileInfo = parseFileListRow((JsonArray) rowValue, panType, + domainName, shareUrl, pwd); + if (fileInfo != null) { + fileList.add(fileInfo); + } + } catch (Exception e) { + log.warn("解析文件行失败: {}", e.getMessage()); + } + } + + int nextStart = start + aaData.size(); + int total = parseFileListTotal(listJson); + if (isUnexpectedEmptyFileListPage(start, aaData.size(), total)) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: 上游返回空分页" + + " (start=" + start + ", total=" + total + ")"); + return; + } + if (shouldFetchNextFileListPage(start, aaData.size(), total)) { + fetchFileListPage(fileListBaseUrl, nextStart, pageIndex + 1, fileList, + listPromise, panType, domainName, shareUrl, pwd); + } else { + completeListPromise(listPromise, fileList); + } + } catch (Exception e) { + failListPromise(listPromise, baseMsg() + " 文件列表解析失败: " + e.getMessage() + + " (start=" + start + ", length=" + FILE_LIST_PAGE_SIZE + ")"); + } + } + + @Override + public Future parseById() { + Object paramValue = shareLinkInfo.getOtherParam().get("paramJson"); + if (!(paramValue instanceof JsonObject)) { + Promise parsePromise = Promise.promise(); + parsePromise.fail(baseMsg() + " 缺少下载参数paramJson"); + return parsePromise.future(); + } + JsonObject paramJson = (JsonObject) paramValue; + if (!applyFileParam(shareLinkInfo, paramJson)) { + Promise parsePromise = Promise.promise(); + parsePromise.fail(baseMsg() + " 下载参数id为空"); + return parsePromise.future(); + } + return parse(); + } + + static boolean applyFileParam(ShareLinkInfo shareLinkInfo, JsonObject paramJson) { + String fileShareKey = paramJson.getString("id"); + if (fileShareKey == null || fileShareKey.isBlank()) { + return false; + } + shareLinkInfo.setSharePassword(paramJson.getString("pwd", "")); + shareLinkInfo.setShareKey(fileShareKey); + shareLinkInfo.setShareUrl(SHARE_FILE_URL_PREFIX + fileShareKey); + shareLinkInfo.setStandardUrl(SHARE_FILE_URL_PREFIX + fileShareKey); + return true; + } + + static String resolveDownloadUid(JsonObject fileJson, String fallbackUid) { + return firstNonBlank(valueToString(fileJson.getValue("userid")), fallbackUid); + } + + static String resolveDownloadFid(JsonObject fileJson, String fallbackFid) { + return firstNonBlank(valueToString(fileJson.getValue("file_id")), fallbackFid); + } + + private HttpRequest withCtAjaxHeaders(HttpRequest request, String shareUrl) { + request.putHeader("User-Agent", BROWSER_UA) + .putHeader("Accept", AJAX_ACCEPT) + .putHeader("X-Requested-With", "XMLHttpRequest"); + if (shareUrl != null && !shareUrl.isBlank()) { + request.putHeader("Referer", shareUrl); + } + String origin = extractOrigin(shareUrl); + if (!origin.isBlank()) { + request.putHeader("Origin", origin); + } + return request; + } + + static FileInfo parseFileListRow(JsonArray row, String panType, String domainName, String shareUrl, String pwd) { + if (row == null || row.size() < 2) { + return null; + } + String checkboxHtml = rowString(row, 0); + String nameCellHtml = rowString(row, 1); + String sizeStr = rowString(row, 2).trim(); + String dateStr = rowString(row, 3).trim(); + if (nameCellHtml.isBlank()) { + return null; + } + + String fileName = matchFirst(FILE_NAME_PATTERN, nameCellHtml); + String fileIcon = matchFirst(FILE_ICON_PATTERN, nameCellHtml); + if (fileName == null || fileName.isBlank()) { + return null; + } + + Matcher subdirMatcher = SUBDIR_PATTERN.matcher(nameCellHtml); + boolean hasSubdirCall = subdirMatcher.find(); + if (hasSubdirCall || "folder".equalsIgnoreCase(fileIcon)) { + String folderId = hasSubdirCall ? subdirMatcher.group(1) : null; + String folderKey = hasSubdirCall ? subdirMatcher.group(2) : ""; + if (folderId == null) { + folderId = matchFirst(FOLDER_ID_PATTERN, checkboxHtml); + } + if (folderId == null || folderId.isBlank()) { + return null; + } + String dirId = folderId + ":" + folderKey; + FileInfo fileInfo = new FileInfo() + .setFileName(fileName.trim()) + .setFileId(folderId) + .setSize(0L) + .setSizeStr(sizeStr.isBlank() ? "0B" : sizeStr) + .setFileType("folder") + .setFileIcon(fileIcon) + .setPanType(panType) + .setParserUrl(buildFolderParserUrl(domainName, shareUrl, dirId, pwd)); + if (!dateStr.isBlank()) { + fileInfo.setCreateTime(dateStr).setUpdateTime(dateStr); + } + return fileInfo; + } + + String fileShareKey = matchFirst(FILE_HREF_PATTERN, nameCellHtml); + if (fileShareKey == null || fileShareKey.isBlank()) { + return null; + } + String fileId = matchFirst(FILE_ID_PATTERN, checkboxHtml); + JsonObject paramJson = new JsonObject() + .put("id", fileShareKey) + .put("fileName", fileName.trim()) + .put("pwd", pwd == null ? "" : pwd); + String param = CommonUtils.urlBase64Encode(paramJson.encode()); + long sizeBytes = 0; + try { + sizeBytes = sizeStr.isBlank() ? 0 : FileSizeConverter.convertToBytes(sizeStr); + } catch (Exception ignored) { + } + + FileInfo fileInfo = new FileInfo() + .setFileName(fileName.trim()) + .setFileId(fileId) + .setSizeStr(sizeStr) + .setSize(sizeBytes) + .setFileType(fileIcon != null ? fileIcon : "file") + .setFileIcon(fileIcon) + .setPanType(panType) + .setParserUrl(String.format("%s/v2/redirectUrl/%s/%s", + domainName, panType, param)); + if (!dateStr.isBlank()) { + fileInfo.setCreateTime(dateStr).setUpdateTime(dateStr); + } + return fileInfo; + } + + private static String buildFolderParserUrl(String domainName, String shareUrl, String dirId, String pwd) { + String url = String.format("%s/v2/getFileList?url=%s&dirId=%s", + domainName, urlEncode(shareUrl), urlEncode(dirId)); + if (pwd != null && !pwd.isBlank()) { + url += "&pwd=" + urlEncode(pwd); + } + return url; + } + + static String extractQueryParam(String url, String paramName) { + if (url == null || paramName == null) return null; int qIdx = url.indexOf('?'); if (qIdx < 0) return null; String query = url.substring(qIdx + 1); + int fragmentIdx = query.indexOf('#'); + if (fragmentIdx >= 0) { + query = query.substring(0, fragmentIdx); + } for (String param : query.split("&")) { int eqIdx = param.indexOf('='); - if (eqIdx > 0 && param.substring(0, eqIdx).equals(paramName)) { - return param.substring(eqIdx + 1); + if (eqIdx > 0 && urlDecode(param.substring(0, eqIdx)).equals(paramName)) { + return urlDecode(param.substring(eqIdx + 1)); } } return null; } + + static String extractPath(String shareUrl) { + if (shareUrl == null) { + return ""; + } + int comIdx = shareUrl.indexOf("com/"); + if (comIdx < 0) { + return ""; + } + + int pathStart = comIdx + 4; + int pathEnd = shareUrl.indexOf('/', pathStart); + if (pathEnd < 0) { + pathEnd = shareUrl.indexOf('?', pathStart); + } + if (pathEnd < 0) { + pathEnd = shareUrl.length(); + } + return shareUrl.substring(pathStart, pathEnd); + } + + static String extractFolderKey(String shareUrl) { + return trimToEmpty(extractQueryParam(shareUrl, "fk")); + } + + static DirectoryContext resolveDirectoryContext(String shareUrl, Object dirIdObj) { + String dirId = dirIdObj == null ? "" : urlDecode(String.valueOf(dirIdObj).trim()); + if (!dirId.isBlank()) { + String[] split = dirId.split(":", 2); + return new DirectoryContext(trimToDefault(split[0], "undefined"), + split.length > 1 ? trimToEmpty(split[1]) : ""); + } + + String queryFolderId = firstNonBlank(extractQueryParam(shareUrl, "folder_id"), extractQueryParam(shareUrl, "d")); + String queryFk = extractFolderKey(shareUrl); + if (!queryFolderId.isBlank() || !queryFk.isBlank()) { + return new DirectoryContext(trimToDefault(queryFolderId, "undefined"), queryFk); + } + return new DirectoryContext("undefined", ""); + } + + static String buildDirectoryFailureMessage(JsonObject resJson, JsonObject dirInfo) { + String code = valueToString(resJson.getValue("code")); + String message = valueToString(dirInfo.getValue("message")); + if (message != null && !message.isBlank()) { + return "目录解析失败: " + message + " (code=" + code + ")"; + } + if ("423".equals(code)) { + return "目录解析失败: 需要访问密码或该分享受限 (code=423)"; + } + return "目录解析失败: 文件列表URL为空, 上游响应: " + resJson.encode(); + } + + static String buildFileListParams(int start, int length) { + return FILE_LIST_PARAMS_TEMPLATE + .replace("{start}", String.valueOf(Math.max(0, start))) + .replace("{length}", String.valueOf(Math.max(1, length))); + } + + static int parseFileListTotal(JsonObject listJson) { + int displayTotal = parseInteger(listJson.getValue("iTotalDisplayRecords"), -1); + return displayTotal >= 0 ? displayTotal : parseInteger(listJson.getValue("iTotalRecords"), -1); + } + + static boolean shouldFetchNextFileListPage(int start, int rowCount, int total) { + if (rowCount <= 0) { + return false; + } + int fetchedThrough = start + rowCount; + return total < 0 ? rowCount >= FILE_LIST_PAGE_SIZE : fetchedThrough < total; + } + + static boolean isUnexpectedEmptyFileListPage(int start, int rowCount, int total) { + return total >= 0 && start < total && rowCount <= 0; + } + + private static int parseInteger(Object value, int defaultValue) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + if (value == null) { + return defaultValue; + } + try { + return Integer.parseInt(value.toString()); + } catch (NumberFormatException e) { + return defaultValue; + } + } + + private static void failListPromise(Promise> listPromise, String message) { + if (!listPromise.future().isComplete()) { + listPromise.fail(message); + } + } + + private static void failListPromise(Promise> listPromise, Throwable throwable) { + if (!listPromise.future().isComplete()) { + listPromise.fail(throwable); + } + } + + private static void completeListPromise(Promise> listPromise, List fileList) { + if (!listPromise.future().isComplete()) { + listPromise.complete(fileList); + } + } + + private static String toCtApiUrl(String url) { + if (url.startsWith("http://") || url.startsWith("https://")) { + return url; + } + return API_URL_PREFIX + url; + } + + private static String appendQueryParams(String url, String params) { + String normalizedParams = params != null && params.startsWith("&") ? params.substring(1) : params; + return url + (url.contains("?") ? "&" : "?") + normalizedParams; + } + + private static String rowString(JsonArray row, int index) { + if (row == null || index >= row.size()) { + return ""; + } + return valueToString(row.getValue(index)); + } + + private static String valueToString(Object value) { + return value == null ? "" : value.toString(); + } + + private static String matchFirst(Pattern pattern, String text) { + if (text == null) { + return null; + } + Matcher matcher = pattern.matcher(text); + return matcher.find() ? matcher.group(1) : null; + } + + private static String firstNonBlank(String first, String second) { + return !trimToEmpty(first).isBlank() ? trimToEmpty(first) : trimToEmpty(second); + } + + private static String trimToDefault(String value, String defaultValue) { + String result = trimToEmpty(value); + return result.isBlank() ? defaultValue : result; + } + + private static String trimToEmpty(String value) { + return value == null ? "" : value.trim(); + } + + private static String urlEncode(String value) { + return URLEncoder.encode(value == null ? "" : value, StandardCharsets.UTF_8); + } + + private static String urlDecode(String value) { + if (value == null) { + return ""; + } + try { + return URLDecoder.decode(value, StandardCharsets.UTF_8); + } catch (Exception e) { + return value; + } + } + + private static String extractOrigin(String shareUrl) { + try { + URI uri = URI.create(shareUrl); + if (uri.getScheme() == null || uri.getHost() == null) { + return ""; + } + String origin = uri.getScheme() + "://" + uri.getHost(); + return uri.getPort() > 0 ? origin + ":" + uri.getPort() : origin; + } catch (Exception e) { + return ""; + } + } + + static final class DirectoryContext { + final String folderId; + final String folderKey; + + DirectoryContext(String folderId, String folderKey) { + this.folderId = folderId; + this.folderKey = folderKey; + } + } } diff --git a/parser/src/main/java/cn/qaiu/parser/impl/FcTool.java b/parser/src/main/java/cn/qaiu/parser/impl/FcTool.java index fb356da..886bc80 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/FcTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/FcTool.java @@ -25,6 +25,10 @@ public class FcTool extends PanBase { private static final String DOWN_REQUEST_URL = "https://v2.fangcloud.cn/apps/files/download?file_id={fid}" + "&scenario=share&unique_name={uname}"; + // 静态编译的正则表达式,避免每次调用都重新编译 + private static final Pattern REQUEST_TOKEN_PATTERN = Pattern.compile("name=\"requesttoken\"\\s+value=\"([a-zA-Z0-9_+=]+)\""); + private static final Pattern TYPED_ID_PATTERN = Pattern.compile("id=\"typed_id\"\\s+value=\"file_(\\d+)\""); + public FcTool(ShareLinkInfo shareLinkInfo) { super(shareLinkInfo); } @@ -41,8 +45,7 @@ public class FcTool extends PanBase { if (StringUtils.isNotEmpty(pwd)) { // 获取requesttoken String html = res.bodyAsString(); - Pattern compile = Pattern.compile("name=\"requesttoken\"\\s+value=\"([a-zA-Z0-9_+=]+)\""); - Matcher matcher = compile.matcher(html); + Matcher matcher = REQUEST_TOKEN_PATTERN.matcher(html); if (!matcher.find()) { fail(SHARE_URL_PREFIX + " 未匹配到加密分享的密码输入页面的requesttoken"); return; @@ -71,8 +74,7 @@ public class FcTool extends PanBase { WebClientSession sClient) { // 从HTML中找到文件id String html = res.bodyAsString(); - Pattern compile = Pattern.compile("id=\"typed_id\"\\s+value=\"file_(\\d+)\""); - Matcher matcher = compile.matcher(html); + Matcher matcher = TYPED_ID_PATTERN.matcher(html); if (!matcher.find()) { fail(SHARE_URL_PREFIX + " 未匹配到文件id(typed_id)"); return; diff --git a/parser/src/main/java/cn/qaiu/parser/impl/FjTool.java b/parser/src/main/java/cn/qaiu/parser/impl/FjTool.java index 6b991b4..d3092f4 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/FjTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/FjTool.java @@ -169,8 +169,13 @@ public class FjTool extends PanBase { // 文件Id JsonObject fileInfo = resJson.getJsonArray("list").getJsonObject(0); + JsonArray fileListArray = fileInfo.getJsonArray("fileList"); + if (fileListArray == null || fileListArray.isEmpty()) { + fail(FIRST_REQUEST_URL + " 文件列表为空: " + fileInfo); + return; + } // 如果是目录返回目录ID - JsonObject fileList = fileInfo.getJsonArray("fileList").getJsonObject(0); + JsonObject fileList = fileListArray.getJsonObject(0); if (fileList.getInteger("fileType") == 2) { promise.complete(fileList.getInteger("folderId").toString()); return; diff --git a/parser/src/main/java/cn/qaiu/parser/impl/GenShortUrl.java b/parser/src/main/java/cn/qaiu/parser/impl/GenShortUrl.java index 45baacc..c814f10 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/GenShortUrl.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/GenShortUrl.java @@ -31,10 +31,12 @@ public class GenShortUrl extends PanBase { private static final String WRAPPER_URL = "https://www.so.com/link?m=ewgUSYiFWXIoTybC3fJH8YoJy8y10iRquo6cazgINwWjTn3HvVJ92TrCJu0PmMUR0RMDfOAucP3wa4G8j64SrhNH9Z0Cr0PEyn9ASuvpkUGmAjjUEGJkO5%2BIDGWVrEkPHsL7UsoKO6%2BlT%2BD6r&ccc="; private static final String MID = "5095144728824883"; // 微博的mid + private static final Pattern SHORT_URL_PATTERN = Pattern.compile("(https?)://t.cn/\\w+"); + private static final Pattern COMMENT_ID_PATTERN = Pattern.compile("comment_id=\"(\\d+)\""); + private static final MultiMap HEADER = HeadersMultiMap.headers() .add("Content-Type", "application/x-www-form-urlencoded") .add("Referer", "https://www.weibo.com") - .add("Content-Type", "application/x-www-form-urlencoded") .add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"); Cookie cookie = new DefaultCookie("SUB", "_2A25KJE5vDeRhGeRJ6lsR9SjJzDuIHXVpWM-nrDV8PUJbkNAbLVPlkW1NUmJm3GjYtRHBsHdMUKafkdTL_YheMEmu"); @@ -64,11 +66,12 @@ public class GenShortUrl extends PanBase { String shortUrl = extractShortUrl(comment); if (shortUrl != null) { log.info("生成的短链:{}", shortUrl); + // 先完成 promise,返回短链 + promise.complete(shortUrl); + // 异步清理评论(best-effort,不影响结果) String commentId = extractCommentId(comment); if (commentId != null) { deleteComment(commentId); - } else { - promise.fail("未能提取评论ID"); } } else { promise.fail("未能生成短链"); @@ -103,8 +106,7 @@ public class GenShortUrl extends PanBase { } private String extractShortUrl(String comment) { - Pattern pattern = Pattern.compile("(https?)://t.cn/\\w+"); - Matcher matcher = pattern.matcher(comment); + Matcher matcher = SHORT_URL_PATTERN.matcher(comment); if (matcher.find()) { return matcher.group(0); } @@ -112,8 +114,7 @@ public class GenShortUrl extends PanBase { } private String extractCommentId(String comment) { - Pattern pattern = Pattern.compile("comment_id=\"(\\d+)\""); - Matcher matcher = pattern.matcher(comment); + Matcher matcher = COMMENT_ID_PATTERN.matcher(comment); if (matcher.find()) { return matcher.group(1); } diff --git a/parser/src/main/java/cn/qaiu/parser/impl/IzSelectorTool.java b/parser/src/main/java/cn/qaiu/parser/impl/IzSelectorTool.java index 7e3b731..0abfe23 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/IzSelectorTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/IzSelectorTool.java @@ -52,4 +52,14 @@ public class IzSelectorTool implements IPanTool { public Future parseById() { return selectedTool.parseById(); } + + @Override + public ShareLinkInfo getShareLinkInfo() { + return selectedTool.getShareLinkInfo(); + } + + @Override + public void close() { + IPanTool.closeQuietly(selectedTool); + } } diff --git a/parser/src/main/java/cn/qaiu/parser/impl/IzTool.java b/parser/src/main/java/cn/qaiu/parser/impl/IzTool.java index c135845..6565fd4 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/IzTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/IzTool.java @@ -443,16 +443,75 @@ public class IzTool extends PanBase { } } - private void down(HttpResponse res2) { - MultiMap headers = res2.headers(); - if (!headers.contains("Location") || StringUtils.isBlank(headers.get("Location"))) { - fail("找不到下载链接可能服务器已被禁止或者配置的认证信息有误"); - return; - } - promise.complete(headers.get("Location")); - } - - // 目录解析 + private void down(HttpResponse res2) { + MultiMap headers = res2.headers(); + String location = headers.get("Location"); + if (StringUtils.isBlank(location)) { + fail("{}", buildMissingLocationMessage(res2)); + return; + } + promise.complete(location); + } + + private String buildMissingLocationMessage(HttpResponse response) { + StringBuilder message = new StringBuilder("未获取到下载重定向地址"); + message.append(", HTTP ").append(response.statusCode()); + + String body = null; + try { + body = asText(response); + } catch (Exception e) { + body = "<响应体读取失败: " + e.getMessage() + ">"; + } + + if (StringUtils.isNotBlank(body)) { + try { + JsonObject json = new JsonObject(body); + String upstreamMsg = json.getString("msg"); + Object code = json.getValue("code"); + if (StringUtils.isNotBlank(upstreamMsg)) { + message.append(", 上游返回: ").append(upstreamMsg); + if (code != null) { + message.append(" (code=").append(code).append(")"); + } + } else { + message.append(", 响应体: ").append(previewBody(body)); + } + } catch (Exception ignored) { + message.append(", 响应体: ").append(previewBody(body)); + } + } else { + message.append(", 响应体为空"); + } + + Object fileName = shareLinkInfo.getOtherParam().get("fileName"); + Object fileSize = shareLinkInfo.getOtherParam().get("fileSizeFormat"); + if (fileName != null) { + message.append(", 文件: ").append(fileName); + } + if (fileSize != null) { + message.append(", 大小: ").append(fileSize); + } + if (!hasConfiguredAuth()) { + message.append(", 当前为免登录解析,上游可能要求登录、会员或人工处理"); + } + return message.toString(); + } + + private boolean hasConfiguredAuth() { + Object authObj = shareLinkInfo.getOtherParam().get("auths"); + if (!(authObj instanceof MultiMap auths)) { + return false; + } + return StringUtils.isNotBlank(auths.get("username")) && StringUtils.isNotBlank(auths.get("password")); + } + + private String previewBody(String body) { + int maxLength = 500; + return body.length() <= maxLength ? body : body.substring(0, maxLength) + "..."; + } + + // 目录解析 @Override public Future> parseFileList() { Promise> promise = Promise.promise(); diff --git a/parser/src/main/java/cn/qaiu/parser/impl/IzToolWithAuth.java b/parser/src/main/java/cn/qaiu/parser/impl/IzToolWithAuth.java index 074d506..f8829aa 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/IzToolWithAuth.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/IzToolWithAuth.java @@ -414,11 +414,70 @@ public class IzToolWithAuth extends PanBase { private void down(HttpResponse res2) { MultiMap headers = res2.headers(); - if (!headers.contains("Location") || StringUtils.isBlank(headers.get("Location"))) { - fail("找不到下载链接可能服务器已被禁止或者配置的认证信息有误"); + String location = headers.get("Location"); + if (StringUtils.isBlank(location)) { + fail("{}", buildMissingLocationMessage(res2)); return; } - promise.complete(headers.get("Location")); + promise.complete(location); + } + + private String buildMissingLocationMessage(HttpResponse response) { + StringBuilder message = new StringBuilder("未获取到下载重定向地址"); + message.append(", HTTP ").append(response.statusCode()); + + String body = null; + try { + body = asText(response); + } catch (Exception e) { + body = "<响应体读取失败: " + e.getMessage() + ">"; + } + + if (StringUtils.isNotBlank(body)) { + try { + JsonObject json = new JsonObject(body); + String upstreamMsg = json.getString("msg"); + Object code = json.getValue("code"); + if (StringUtils.isNotBlank(upstreamMsg)) { + message.append(", 上游返回: ").append(upstreamMsg); + if (code != null) { + message.append(" (code=").append(code).append(")"); + } + } else { + message.append(", 响应体: ").append(previewBody(body)); + } + } catch (Exception ignored) { + message.append(", 响应体: ").append(previewBody(body)); + } + } else { + message.append(", 响应体为空"); + } + + Object fileName = shareLinkInfo.getOtherParam().get("fileName"); + Object fileSize = shareLinkInfo.getOtherParam().get("fileSizeFormat"); + if (fileName != null) { + message.append(", 文件: ").append(fileName); + } + if (fileSize != null) { + message.append(", 大小: ").append(fileSize); + } + if (!hasConfiguredAuth()) { + message.append(", 当前为免登录解析,上游可能要求登录、会员或人工处理"); + } + return message.toString(); + } + + private boolean hasConfiguredAuth() { + Object authObj = shareLinkInfo.getOtherParam().get("auths"); + if (!(authObj instanceof MultiMap auths)) { + return false; + } + return StringUtils.isNotBlank(auths.get("username")) && StringUtils.isNotBlank(auths.get("password")); + } + + private String previewBody(String body) { + int maxLength = 500; + return body.length() <= maxLength ? body : body.substring(0, maxLength) + "..."; } // 目录解析 diff --git a/parser/src/main/java/cn/qaiu/parser/impl/LzTool.java b/parser/src/main/java/cn/qaiu/parser/impl/LzTool.java index cfa3b2f..9f0f2f6 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/LzTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/LzTool.java @@ -15,6 +15,8 @@ import org.openjdk.nashorn.api.scripting.ScriptObjectMirror; import javax.script.ScriptException; import java.net.MalformedURLException; +import java.time.LocalDate; +import java.time.LocalDateTime; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -31,6 +33,20 @@ public class LzTool extends PanBase { WebClientSession webClientSession = WebClientSession.create(clientNoRedirects); public static final String SHARE_URL_PREFIX = "https://w1.lanzn.com/"; + + // 静态编译的正则表达式,避免每次调用都重新编译 + private static final Pattern FILE_NAME_PATTERN = Pattern.compile("padding: 56px 0px 20px 0px;\">(.*?)<|filenajax\">(.*?)<"); + private static final Pattern FILE_SIZE_PATTERN = Pattern.compile(">文件大小:(.*?)
|\"n_filesize\">大小:(.*?)"); + private static final Pattern SHARE_USER_PATTERN = Pattern.compile(">分享用户:(.*?)|获取(.*?)的文件|\"user-name\">(.*?)
(.*?)|class=\"n_box_des\">(.*?)"); + private static final Pattern FILE_ID_PATTERN = Pattern.compile("\\?f=(.*?)&|fid = (.*?);"); + private static final Pattern CREATE_TIME_PATTERN = Pattern.compile(">上传时间:(.*?)<"); + private static final Pattern URL_DATE_PATTERN = Pattern.compile("(\\d{4}/\\d{1,2}/\\d{1,2})"); + private static final Pattern ARG1_PATTERN = Pattern.compile("var arg1='([^']+)'"); + private static final Pattern IFRAME_SRC_PATTERN = Pattern.compile("src=\"(/fn\\?[a-zA-Z\\d_+/=]{16,})\""); + private static final Pattern RELATIVE_TIME_PATTERN = Pattern.compile("^(\\d+|几)\\s*(分钟|小时)前$"); + private static final Pattern DATE_PATTERN = Pattern.compile("^(\\d{4})\\s*[-/年]\\s*(\\d{1,2})\\s*[-/月]\\s*(\\d{1,2})\\s*日?$"); + private static final Pattern MONTH_DAY_PATTERN = Pattern.compile("^(\\d{1,2})\\s*月\\s*(\\d{1,2})\\s*日?$"); MultiMap headers0 = HeaderUtils.parseHeaders(""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7 Accept-Encoding: gzip, deflate @@ -62,19 +78,30 @@ public class LzTool extends PanBase { client.getAbs(sUrl) .putHeaders(headers0) .send().onSuccess(res -> { - String html = asText(res); - if (html.contains("var arg1='")) { - webClientSession = WebClientSession.create(clientNoRedirects); - setCookie(html, sUrl); - webClientSession.getAbs(sUrl) - .putHeaders(headers0) - .send().onSuccess(res2 -> { - String html2 = asText(res2); - doParser(html2, pwd, sUrl); - }); + try { + String html = asText(res); + if (hasAcwArg1(html)) { + webClientSession = WebClientSession.create(clientNoRedirects); + if (!setCookie(html, sUrl)) { + fail("蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); + return; + } + webClientSession.getAbs(sUrl) + .putHeaders(headers0) + .send().onSuccess(res2 -> { + try { + String html2 = asText(res2); + doParser(html2, pwd, sUrl); + } catch (Exception e) { + fail("蓝奏云页面响应处理异常: {}", e.getMessage()); + } + }).onFailure(handleFail(sUrl)); - } else { - doParser(html, pwd, sUrl); + } else { + doParser(html, pwd, sUrl); + } + } catch (Exception e) { + fail("蓝奏云页面响应处理异常: {}", e.getMessage()); } }).onFailure(handleFail(sUrl)); @@ -82,22 +109,41 @@ public class LzTool extends PanBase { } private void doParser(String html, String pwd, String sUrl) { + if (html == null || html.isBlank()) { + fail("蓝奏云页面响应为空"); + return; + } + if (isShareCancelledPage(html)) { + fail("分享已失效或文件已取消分享"); + return; + } // 检测是否为目录分享链接 (含 /s/、/b/ 路径段或 b 开头的路径段) if (sUrl.matches(".*/(s|b)/[^/]+.*") || sUrl.matches(".*/b[^/]+.*")) { fail("该链接为蓝奏云目录分享,请使用目录解析接口"); return; } // 若仍是校验页 (parse()中cookie域名与实际URL不匹配时会出现), 重试一次 - if (html.contains("var arg1='")) { + if (hasAcwArg1(html)) { webClientSession = WebClientSession.create(clientNoRedirects); - setCookie(html, sUrl); + if (!setCookie(html, sUrl)) { + fail("蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); + return; + } webClientSession.getAbs(sUrl).putHeaders(headers0).send().onSuccess(res -> { - String html2 = asText(res); - if (html2.contains("var arg1='")) { - fail("蓝奏云反爬校验失败,请稍后重试"); - return; + try { + String html2 = asText(res); + if (isShareCancelledPage(html2)) { + fail("分享已失效或文件已取消分享"); + return; + } + if (hasAcwArg1(html2)) { + fail("蓝奏云反爬校验失败,请稍后重试"); + return; + } + doParserInternal(html2, pwd, sUrl); + } catch (Exception e) { + fail("蓝奏云页面响应处理异常: {}", e.getMessage()); } - doParserInternal(html2, pwd, sUrl); }).onFailure(handleFail(sUrl)); return; } @@ -105,14 +151,21 @@ public class LzTool extends PanBase { } private void doParserInternal(String html, String pwd, String sUrl) { + if (html == null || html.isBlank()) { + fail("蓝奏云页面响应为空"); + return; + } + if (isShareCancelledPage(html)) { + fail("分享已失效或文件已取消分享"); + return; + } try { setFileInfo(html, shareLinkInfo); } catch (Exception e) { log.error("文件信息解析异常", e); } // 匹配iframe - Pattern compile = Pattern.compile("src=\"(/fn\\?[a-zA-Z\\d_+/=]{16,})\""); - Matcher matcher = compile.matcher(html); + Matcher matcher = IFRAME_SRC_PATTERN.matcher(html); // 没有Iframe说明是加密分享, 匹配sign通过密码请求下载页面 if (!matcher.find()) { try { @@ -126,46 +179,64 @@ public class LzTool extends PanBase { // 没有密码 String iframePath = matcher.group(1); String absoluteURI = SHARE_URL_PREFIX + iframePath; - webClientSession.getAbs(absoluteURI).putHeaders(headers0).send().onSuccess(res2 -> { - String html2 = asText(res2); - String jsText = getJsText(html2); - if (jsText == null) { - headers0.add("Referer", absoluteURI); - setCookie(html2, absoluteURI); - webClientSession.getAbs(absoluteURI).send().onSuccess(res3 -> { - String html3 = asText(res3); - String jsText3 = getJsText(html3); - if (jsText3 != null) { - try { - ScriptObjectMirror scriptObjectMirror = JsExecUtils.executeDynamicJs(jsText3, null); - getDownURL(sUrl, scriptObjectMirror); - } catch (ScriptException | NoSuchMethodException e) { - fail(e, "引擎执行失败"); - } - } else { - fail(SHARE_URL_PREFIX + iframePath + " -> " + sUrl + ": 获取失败0, 可能分享已失效"); - } - }); - } else { - try { - ScriptObjectMirror scriptObjectMirror = JsExecUtils.executeDynamicJs(jsText, null); - getDownURL(sUrl, scriptObjectMirror); - } catch (ScriptException | NoSuchMethodException e) { - fail(e, "js引擎执行失败"); + // 创建局部副本,避免修改实例字段导致累积 + MultiMap headersCopy = MultiMap.caseInsensitiveMultiMap().addAll(headers0); + headersCopy.add("Referer", absoluteURI); + webClientSession.getAbs(absoluteURI).putHeaders(headersCopy).send().onSuccess(res2 -> { + try { + String html2 = asText(res2); + if (isShareCancelledPage(html2)) { + fail("分享已失效或文件已取消分享"); + return; } + String jsText = getJsText(html2); + if (jsText == null) { + if (!setCookie(html2, absoluteURI)) { + fail("蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); + return; + } + webClientSession.getAbs(absoluteURI).send().onSuccess(res3 -> { + try { + String html3 = asText(res3); + if (isShareCancelledPage(html3)) { + fail("分享已失效或文件已取消分享"); + return; + } + String jsText3 = getJsText(html3); + if (jsText3 != null) { + try { + ScriptObjectMirror scriptObjectMirror = JsExecUtils.executeDynamicJs(jsText3, null); + getDownURL(sUrl, scriptObjectMirror); + } catch (ScriptException | NoSuchMethodException e) { + fail(e, "引擎执行失败"); + } + } else { + fail(SHARE_URL_PREFIX + iframePath + " -> " + sUrl + ": 获取失败0, 可能分享已失效"); + } + } catch (Exception e) { + fail("蓝奏云 iframe 响应处理异常: {}", e.getMessage()); + } + }).onFailure(handleFail(absoluteURI)); + } else { + try { + ScriptObjectMirror scriptObjectMirror = JsExecUtils.executeDynamicJs(jsText, null); + getDownURL(sUrl, scriptObjectMirror); + } catch (ScriptException | NoSuchMethodException e) { + fail(e, "js引擎执行失败"); + } + } + } catch (Exception e) { + fail("蓝奏云 iframe 响应处理异常: {}", e.getMessage()); } }).onFailure(handleFail(SHARE_URL_PREFIX)); } } - private void setCookie(String html, String url) { - int beginIndex = html.indexOf("arg1='") + 6; - int endIndex = html.indexOf("';", beginIndex); - if (beginIndex < 6 || endIndex == -1 || endIndex <= beginIndex) { - fail("蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); - return; + private boolean setCookie(String html, String url) { + String arg1 = extractAcwArg1(html); + if (arg1 == null) { + return false; } - String arg1 = html.substring(beginIndex, endIndex); String acw_sc__v2 = AcwScV2Generator.acwScV2Simple(arg1); // 从 URL 中动态提取域名(如 lanzoum.com, lanzoux.com 等) String domain = ".lanzn.com"; // 默认兜底 @@ -184,6 +255,7 @@ public class LzTool extends PanBase { nettyCookie.setSecure(false); nettyCookie.setHttpOnly(false); webClientSession.cookieStore().put(nettyCookie); + return true; } private String getJsByPwd(String pwd, String html, String subText) { @@ -201,6 +273,9 @@ public class LzTool extends PanBase { } private String getJsText(String html) { + if (html == null) { + return null; + } String jsTagStart = ""; int index = html.lastIndexOf(jsTagStart); @@ -209,9 +284,38 @@ public class LzTool extends PanBase { } int startPos = index + jsTagStart.length(); int endPos = html.indexOf(jsTagEnd, startPos); + if (endPos <= startPos) { + return null; + } return html.substring(startPos, endPos).replaceAll("", ""); } + static String extractAcwArg1(String html) { + if (html == null) { + return null; + } + int beginIndex = html.indexOf("arg1='"); + if (beginIndex < 0) { + return null; + } + beginIndex += 6; + int endIndex = html.indexOf("';", beginIndex); + if (endIndex <= beginIndex) { + return null; + } + return html.substring(beginIndex, endIndex); + } + + static boolean isShareCancelledPage(String html) { + return html != null + && ((html.contains("来晚啦") && html.contains("取消分享")) + || (html.contains("class=\"off\"") && html.contains("取消分享"))); + } + + private static boolean hasAcwArg1(String html) { + return html != null && html.contains("var arg1='"); + } + private void getDownURL(String key, Map obj) { if (obj == null) { fail("需要访问密码"); @@ -225,7 +329,7 @@ public class LzTool extends PanBase { }); MultiMap headers = HeaderUtils.parseHeaders(""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7 - Accept-Encoding: gzip, deflate, br, zstd + Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6 Cache-Control: no-cache Connection: keep-alive @@ -261,42 +365,57 @@ public class LzTool extends PanBase { headers.remove("Referer"); webClientSession.getAbs(downUrl).putHeaders(headers).send() .onSuccess(res3 -> { - String location = res3.headers().get("Location"); - if (location == null) { - String text = asText(res3); - // 使用cookie 再请求一次 - headers.add("Referer", downUrl); - int beginIndex = text.indexOf("arg1='") + 6; - String arg1 = text.substring(beginIndex, text.indexOf("';", beginIndex)); - String acw_sc__v2 = AcwScV2Generator.acwScV2Simple(arg1); - // 从 downUrl 中动态提取域名 - String downDomain = ".lanrar.com"; - try { - java.net.URL du = new java.net.URL(downUrl); - String h = du.getHost(); - int dot = h.indexOf('.'); - if (dot >= 0) downDomain = h.substring(dot); - } catch (MalformedURLException ignored) {} - // 创建一个 Cookie 并放入 CookieStore - DefaultCookie nettyCookie = new DefaultCookie("acw_sc__v2", acw_sc__v2); - nettyCookie.setDomain(downDomain); - nettyCookie.setPath("/"); - nettyCookie.setSecure(false); - nettyCookie.setHttpOnly(false); - WebClientSession webClientSession2 = WebClientSession.create(clientNoRedirects); - webClientSession2.cookieStore().put(nettyCookie); - webClientSession2.getAbs(downUrl).putHeaders(headers).send() - .onSuccess(res4 -> { - String location0 = res4.headers().get("Location"); - if (location0 == null) { - fail(downUrl + " -> 直链获取失败2, 可能分享已失效"); - } else { - setDateAndComplete(location0); - } - }).onFailure(handleFail(downUrl)); - return; + try { + String location = res3.headers().get("Location"); + if (location == null) { + String text = asText(res3); + if (isShareCancelledPage(text)) { + fail(downUrl + " -> 分享已失效或文件已取消分享"); + return; + } + // 使用cookie 再请求一次 + headers.add("Referer", downUrl); + String arg1 = extractAcwArg1(text); + if (arg1 == null) { + fail(downUrl + " -> 蓝奏云反爬 arg1 Cookie 解析失败,可能分享已失效"); + return; + } + String acw_sc__v2 = AcwScV2Generator.acwScV2Simple(arg1); + // 从 downUrl 中动态提取域名 + String downDomain = ".lanrar.com"; + try { + java.net.URL du = new java.net.URL(downUrl); + String h = du.getHost(); + int dot = h.indexOf('.'); + if (dot >= 0) downDomain = h.substring(dot); + } catch (MalformedURLException ignored) {} + // 创建一个 Cookie 并放入 CookieStore + DefaultCookie nettyCookie = new DefaultCookie("acw_sc__v2", acw_sc__v2); + nettyCookie.setDomain(downDomain); + nettyCookie.setPath("/"); + nettyCookie.setSecure(false); + nettyCookie.setHttpOnly(false); + WebClientSession webClientSession2 = WebClientSession.create(clientNoRedirects); + webClientSession2.cookieStore().put(nettyCookie); + webClientSession2.getAbs(downUrl).putHeaders(headers).send() + .onSuccess(res4 -> { + try { + String location0 = res4.headers().get("Location"); + if (location0 == null) { + fail(downUrl + " -> 直链获取失败2, 可能分享已失效"); + } else { + setDateAndComplete(location0); + } + } catch (Exception e) { + fail("蓝奏云直链二次响应处理异常: {}", e.getMessage()); + } + }).onFailure(handleFail(downUrl)); + return; + } + setDateAndComplete(location); + } catch (Exception e) { + fail("蓝奏云直链响应处理异常: {}", e.getMessage()); } - setDateAndComplete(location); }) .onFailure(handleFail(downUrl)); } catch (Exception e) { @@ -307,10 +426,9 @@ public class LzTool extends PanBase { private void setDateAndComplete(String location0) { // 分享时间 提取url中的时间戳格式:lanzoui.com/abc/abc/yyyy/mm/dd/ - String regex = "(\\d{4}/\\d{1,2}/\\d{1,2})"; - Matcher matcher = Pattern.compile(regex).matcher(location0); + Matcher matcher = URL_DATE_PATTERN.matcher(location0); if (matcher.find()) { - String dateStr = matcher.group().replace("/", "-"); + String dateStr = parseLanzouFileTime(matcher.group()); ((FileInfo)shareLinkInfo.getOtherParam().get("fileInfo")).setCreateTime(dateStr); } promise.complete(location0); @@ -338,26 +456,45 @@ public class LzTool extends PanBase { String pwd = shareLinkInfo.getSharePassword(); webClientSession.getAbs(sUrl).send().onSuccess(res -> { - String html = asText(res); - // 检查是否需要 cookie 验证 - if (html.contains("var arg1='")) { - webClientSession = WebClientSession.create(clientNoRedirects); - setCookie(html, sUrl); - // 重新请求 - webClientSession.getAbs(sUrl).send().onSuccess(res2 -> { - handleFileListParse(asText(res2), pwd, sUrl, promise); - }).onFailure(err -> promise.fail(err)); - return; + try { + String html = asText(res); + // 检查是否需要 cookie 验证 + if (hasAcwArg1(html)) { + webClientSession = WebClientSession.create(clientNoRedirects); + if (!setCookie(html, sUrl)) { + promise.tryFail(baseMsg() + "蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); + return; + } + // 重新请求 + webClientSession.getAbs(sUrl).send().onSuccess(res2 -> { + try { + handleFileListParse(asText(res2), pwd, sUrl, promise); + } catch (Exception e) { + promise.tryFail(e); + } + }).onFailure(promise::tryFail); + return; + } + handleFileListParse(html, pwd, sUrl, promise); + } catch (Exception e) { + promise.tryFail(e); } - handleFileListParse(html, pwd, sUrl, promise); - }).onFailure(err -> promise.fail(err)); + }).onFailure(promise::tryFail); return promise.future(); } private void handleFileListParse(String html, String pwd, String sUrl, Promise> promise) { + if (html == null || html.isBlank()) { + promise.tryFail(baseMsg() + "蓝奏云页面响应为空"); + return; + } + if (isShareCancelledPage(html)) { + promise.tryFail(baseMsg() + "分享已失效或文件已取消分享"); + return; + } // 检测是否为文件分享链接 (不含 /s/、/b/ 路径段且不含 b 开头的路径段) if (!sUrl.matches(".*/(s|b)/[^/]+.*") && !sUrl.matches(".*/b[^/]+.*")) { - promise.fail(baseMsg() + "该链接为蓝奏云文件分享,请使用文件解析接口"); + promise.tryFail(baseMsg() + "该链接为蓝奏云文件分享,请使用文件解析接口"); return; } try { @@ -371,28 +508,43 @@ public class LzTool extends PanBase { String url = SHARE_URL_PREFIX + "filemoreajax.php?file=" + data.get("fid"); webClientSession.postAbs(url).putHeaders(headers).sendForm(map).onSuccess(res2 -> { - String resBody = asText(res2); - // 再次检查是否需要 cookie 验证 - if (resBody.contains("var arg1='")) { - setCookie(resBody, url); - // 重新请求 - webClientSession.postAbs(url).putHeaders(headers).sendForm(map).onSuccess(res3 -> { - handleFileListResponse(asText(res3), promise); - }).onFailure(err -> promise.fail(err)); - return; + try { + String resBody = asText(res2); + // 再次检查是否需要 cookie 验证 + if (hasAcwArg1(resBody)) { + if (!setCookie(resBody, url)) { + promise.tryFail(baseMsg() + "蓝奏云反爬 arg1 Cookie 解析失败,页面内容异常"); + return; + } + // 重新请求 + webClientSession.postAbs(url).putHeaders(headers).sendForm(map).onSuccess(res3 -> { + try { + handleFileListResponse(asText(res3), promise); + } catch (Exception e) { + promise.tryFail(e); + } + }).onFailure(promise::tryFail); + return; + } + handleFileListResponse(resBody, promise); + } catch (Exception e) { + promise.tryFail(e); } - handleFileListResponse(resBody, promise); - }).onFailure(err -> promise.fail(err)); + }).onFailure(promise::tryFail); } catch (ScriptException | NoSuchMethodException | RuntimeException e) { - promise.fail(e); + promise.tryFail(e); } } private void handleFileListResponse(String responseBody, Promise> promise) { try { + if (responseBody == null || responseBody.isBlank()) { + promise.tryFail(baseMsg() + "蓝奏云文件列表响应为空"); + return; + } JsonObject fileListJson = new JsonObject(responseBody); if (fileListJson.getInteger("zt") != 1) { - promise.fail(baseMsg() + fileListJson.getString("info")); + promise.tryFail(baseMsg() + fileListJson.getString("info")); return; } List list = new ArrayList<>(); @@ -423,7 +575,7 @@ public class LzTool extends PanBase { String param = CommonUtils.urlBase64Encode(paramJson.encode()); fileInfo.setFileName(fileName) .setFileId(id) - .setCreateTime(fileJson.getString("time")) + .setCreateTime(parseLanzouFileTime(fileJson.getString("time"))) .setFileType(fileJson.getString("icon")) .setSizeStr(fileJson.getString("size")) .setSize(sizeNum) @@ -436,10 +588,46 @@ public class LzTool extends PanBase { }); promise.complete(list); } catch (Exception e) { - promise.fail(e); + promise.tryFail(e); } } + private static String parseLanzouFileTime(String timeText) { + if (timeText == null || timeText.isBlank()) { + return timeText; + } + String normalized = timeText.trim().replaceAll("\\s+", " "); + Matcher matcher = RELATIVE_TIME_PATTERN.matcher(normalized); + if (matcher.matches()) { + int amount = "几".equals(matcher.group(1)) ? 1 : Integer.parseInt(matcher.group(1)); + String unit = matcher.group(2); + LocalDateTime time = LocalDateTime.now(); + if ("小时".equals(unit)) { + time = time.minusHours(amount); + } else { + time = time.minusMinutes(amount); + } + return time.toLocalDate().toString(); + } + matcher = DATE_PATTERN.matcher(normalized); + if (matcher.matches()) { + return LocalDate.of( + Integer.parseInt(matcher.group(1)), + Integer.parseInt(matcher.group(2)), + Integer.parseInt(matcher.group(3)) + ).toString(); + } + matcher = MONTH_DAY_PATTERN.matcher(normalized); + if (matcher.matches()) { + return LocalDate.of( + LocalDate.now().getYear(), + Integer.parseInt(matcher.group(1)), + Integer.parseInt(matcher.group(2)) + ).toString(); + } + return normalized; + } + @Override public Future parseById() { JsonObject paramJson = (JsonObject) shareLinkInfo.getOtherParam().get("paramJson"); @@ -455,13 +643,13 @@ public class LzTool extends PanBase { shareLinkInfo.getOtherParam().put("fileInfo", fileInfo); try { // 提取文件名 - String fileName = CommonUtils.extract(html, Pattern.compile("padding: 56px 0px 20px 0px;\">(.*?)<|filenajax\">(.*?)<")); - String sizeStr = CommonUtils.extract(html, Pattern.compile(">文件大小:(.*?)
|\"n_filesize\">大小:(.*?)")); - String createBy = CommonUtils.extract(html, Pattern.compile(">分享用户:(.*?)|获取(.*?)的文件|\"user-name\">(.*?)
(.*?)|class=\"n_box_des\">(.*?)")); + String fileName = CommonUtils.extract(html, FILE_NAME_PATTERN); + String sizeStr = CommonUtils.extract(html, FILE_SIZE_PATTERN); + String createBy = CommonUtils.extract(html, SHARE_USER_PATTERN); + String description = CommonUtils.extract(html, DESCRIPTION_PATTERN); // String icon = CommonUtils.extract(html, Pattern.compile("class=\"n_file_icon\" src=\"(.*?)\"")); - String fileId = CommonUtils.extract(html, Pattern.compile("\\?f=(.*?)&|fid = (.*?);")); - String createTime = CommonUtils.extract(html, Pattern.compile(">上传时间:(.*?)<")); + String fileId = CommonUtils.extract(html, FILE_ID_PATTERN); + String createTime = CommonUtils.extract(html, CREATE_TIME_PATTERN); try { fileInfo.setFileName(fileName) .setCreateBy(createBy) @@ -469,7 +657,7 @@ public class LzTool extends PanBase { .setDescription(description) .setFileType("file") .setFileId(fileId) - .setCreateTime(createTime); + .setCreateTime(parseLanzouFileTime(createTime)); if (sizeStr != null && !sizeStr.isBlank()) { long bytes = FileSizeConverter.convertToBytes(sizeStr); fileInfo.setSize(bytes).setSizeStr(FileSizeConverter.convertToReadableSize(bytes)); diff --git a/parser/src/main/java/cn/qaiu/parser/impl/MkgsTool.java b/parser/src/main/java/cn/qaiu/parser/impl/MkgsTool.java index cb01165..df8c754 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/MkgsTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/MkgsTool.java @@ -21,6 +21,8 @@ public class MkgsTool extends PanBase { public static final String API_URL = "https://m.kugou.com/app/i/getSongInfo.php?cmd=playInfo&hash={hash}"; + private static final Pattern HASH_PATTERN = Pattern.compile("\"hash\"\\s*:\\s*\"([A-F0-9]+)\""); + private static final MultiMap headers = MultiMap.caseInsensitiveMultiMap(); static { // 设置 User-Agent @@ -78,10 +80,7 @@ public class MkgsTool extends PanBase { protected void downUrl(String locationURL) { client.getAbs(locationURL).putHeaders(headers).send().onSuccess(res2->{ String body = res2.bodyAsString(); - // 正则表达式匹配 hash 字段 - String regex = "\"hash\"\s*:\s*\"([A-F0-9]+)\""; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(body); + Matcher matcher = HASH_PATTERN.matcher(body); // 查找并输出 hash 字段的值 if (matcher.find()) { diff --git a/parser/src/main/java/cn/qaiu/parser/impl/MkwTool.java b/parser/src/main/java/cn/qaiu/parser/impl/MkwTool.java index 5686703..bb52c6c 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/MkwTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/MkwTool.java @@ -19,6 +19,8 @@ public class MkwTool extends PanBase { public static final String API_URL = "https://www.kuwo.cn/api/v1/www/music/playUrl?mid={mid}&type=music&httpsStatus=1&reqId=&plat=web_www&from="; + private static final Pattern COOKIE_PATTERN = Pattern.compile("([A-Za-z0-9_]+)=([A-Za-z0-9]+)"); + public MkwTool(ShareLinkInfo shareLinkInfo) { super(shareLinkInfo); @@ -29,39 +31,41 @@ public class MkwTool extends PanBase { clientSession.getAbs(shareUrl).send().onSuccess(result -> { String cookie = result.headers().get("set-cookie"); - if (cookie != null && !cookie.isEmpty()) { - - String regex = "([A-Za-z0-9_]+)=([A-Za-z0-9]+)"; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(cookie); - if (matcher.find()) { - log.debug("cookie key: {}", matcher.group(1)); - log.debug("cookie value: {}", matcher.group(2)); - - var key = matcher.group(1); - var token = matcher.group(2); - String sign = JsExecUtils.getKwSign(token, key); - log.debug("sign: {}", sign); - clientSession.getAbs(UriTemplate.of(API_URL)).setTemplateParam("mid", shareLinkInfo.getShareKey()) - .putHeader("Secret", sign).send().onSuccess(res -> { - JsonObject json = asJson(res); - log.debug(json.encodePrettily()); - try { - if (json.getInteger("code") == 200) { - complete(json.getJsonObject("data").getString("url")); - } else { - fail("链接已失效/需要VIP"); - } - - } catch (Exception e) { - log.error("解析失败", e); - fail("解析失败"); - } - }); - } + if (cookie == null || cookie.isEmpty()) { + fail("未获取到 cookie,无法继续解析"); + return; } - }); + Matcher matcher = COOKIE_PATTERN.matcher(cookie); + if (!matcher.find()) { + fail("cookie 格式不匹配"); + return; + } + + log.debug("cookie key: {}", matcher.group(1)); + log.debug("cookie value: {}", matcher.group(2)); + + var key = matcher.group(1); + var token = matcher.group(2); + String sign = JsExecUtils.getKwSign(token, key); + log.debug("sign: {}", sign); + clientSession.getAbs(UriTemplate.of(API_URL)).setTemplateParam("mid", shareLinkInfo.getShareKey()) + .putHeader("Secret", sign).send().onSuccess(res -> { + JsonObject json = asJson(res); + log.debug(json.encodePrettily()); + try { + if (json.getInteger("code") == 200) { + complete(json.getJsonObject("data").getString("url")); + } else { + fail("链接已失效/需要VIP"); + } + + } catch (Exception e) { + log.error("解析失败", e); + fail("解析失败"); + } + }).onFailure(handleFail("获取下载链接失败")); + }).onFailure(handleFail("请求分享页面失败")); return promise.future(); } diff --git a/parser/src/main/java/cn/qaiu/parser/impl/P360Tool.java b/parser/src/main/java/cn/qaiu/parser/impl/P360Tool.java index 8a9d596..895170b 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/P360Tool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/P360Tool.java @@ -16,6 +16,9 @@ import java.util.regex.Pattern; * 下载链接需要Referer: https://link.yunpan.com/ */ public class P360Tool extends PanBase { + + private static final Pattern NID_PATTERN = Pattern.compile("\"nid\": \"([^\"]+)\""); + public P360Tool(ShareLinkInfo shareLinkInfo) { super(shareLinkInfo); } @@ -43,9 +46,7 @@ public class P360Tool extends PanBase { clientSession.getAbs(url) .send() .onSuccess(res -> { - // find "nid": "17402043311959599" - Pattern compile = Pattern.compile("\"nid\": \"([^\"]+)\""); - Matcher matcher = compile.matcher(res.bodyAsString()); + Matcher matcher = NID_PATTERN.matcher(res.bodyAsString()); AtomicReference nid = new AtomicReference<>(); if (matcher.find()) { nid.set(matcher.group(1)); @@ -69,7 +70,7 @@ public class P360Tool extends PanBase { clientSession.getAbs(url) .send() .onSuccess(res3 -> { - Matcher matcher1 = compile.matcher(res3.bodyAsString()); + Matcher matcher1 = NID_PATTERN.matcher(res3.bodyAsString()); if (matcher1.find()) { nid.set(matcher1.group(1)); } else { diff --git a/parser/src/main/java/cn/qaiu/parser/impl/PcxTool.java b/parser/src/main/java/cn/qaiu/parser/impl/PcxTool.java index 852456d..0ba4484 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/PcxTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/PcxTool.java @@ -14,6 +14,25 @@ import java.util.regex.Pattern; */ public class PcxTool extends PanBase { + private static final Pattern TITLE_PATTERN = + Pattern.compile("([^<]+)"); + private static final Pattern FILENAME_INPUT_PATTERN = + Pattern.compile("标签或文件名input - String fileName = extractByRegex(html, "([^<]+)"); + String fileName = extractByRegex(html, TITLE_PATTERN); if (fileName == null) { - fileName = extractByRegex(html, " * https://api.onedrive.com/v1.0/drives/abfd0a26e47d3458/items/ABFD0A26E47D3458!3729?authkey=!AF3rQNA6Yxr46H8 @@ -45,6 +55,13 @@ public class PodTool extends PanBase { private static final Pattern redirectUrlRegex = Pattern.compile("resid=(?[^!]+)!(?[^&]+).+&redeem=(?.+).*"); + private static final Pattern DOWNLOAD_URL_IN_RESPONSE_PATTERN = + Pattern.compile("\"downloadUrl\":\"(?https?://[^\\s\"]+)"); + private static final Pattern ACTION_URL_PATTERN = + Pattern.compile("'action'.+(?https://.+)'\\)"); + private static final Pattern TOKEN_PATTERN = + Pattern.compile("inputElem\\.value\\s*=\\s*'([^']+)'"); + public PodTool(ShareLinkInfo shareLinkInfo) { super(shareLinkInfo); } @@ -97,7 +114,7 @@ public class PodTool extends PanBase { sendHttpRequest(url, token).onSuccess(body -> { Matcher matcher1 = - Pattern.compile("\"downloadUrl\":\"(?https?://[^\s\"]+)").matcher(body); + DOWNLOAD_URL_IN_RESPONSE_PATTERN.matcher(body); if (matcher1.find()) { // 响应体是 JSON 文本,URL 中的 '&' 被转义为 \u0026,需要反转义 complete(unescapeJsonUnicode(matcher1.group("url"))); @@ -121,11 +138,7 @@ public class PodTool extends PanBase { } private String matcherUrl(String html) { - - // 正则表达式来匹配 URL - String urlRegex = "'action'.+(?https://.+)'\\)"; - Pattern urlPattern = Pattern.compile(urlRegex); - Matcher urlMatcher = urlPattern.matcher(html); + Matcher urlMatcher = ACTION_URL_PATTERN.matcher(html); if (urlMatcher.find()) { String url = urlMatcher.group("url"); @@ -165,10 +178,7 @@ public class PodTool extends PanBase { private String matcherToken(String html) { - // 正则表达式来匹配 inputElem.value 中的 Token - String tokenRegex = "inputElem\\.value\\s*=\\s*'([^']+)'"; - Pattern tokenPattern = Pattern.compile(tokenRegex); - Matcher tokenMatcher = tokenPattern.matcher(html); + Matcher tokenMatcher = TOKEN_PATTERN.matcher(html); if (tokenMatcher.find()) { String token = tokenMatcher.group(1); @@ -180,11 +190,8 @@ public class PodTool extends PanBase { public Future sendHttpRequest2(String token, String redeem) { Promise promise = Promise.promise(); - // 构造 HttpClient - HttpClient client = HttpClient.newHttpClient(); // 构造请求的 URI 和头部信息 - // https://onedrive.live.com/redir?cid=abfd0a26e47d3458&resid=ABFD0A26E47D3458!4465&ithint=file%2cxlsx&e=Ao2uSU&migratedtospo=true&redeem=aHR0cHM6Ly8xZHJ2Lm1zL3gvYy9hYmZkMGEyNmU0N2QzNDU4L0VWZzBmZVFtQ3YwZ2dLdHhFUUFBQUFBQlRQRWVDMTZfZk1EYk5FTjhEdTRta1E_ZT1BbzJ1U1U String url = ("https://my.microsoftpersonalcontent.com/_api/v2.0/shares/u!%s/driveItem?$select=content" + ".downloadUrl").formatted(redeem); String authorizationHeader = "Badger " + token; @@ -192,15 +199,20 @@ public class PodTool extends PanBase { // 构建请求 HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(url)) + .timeout(REQUEST_TIMEOUT) .header("Authorization", authorizationHeader) .build(); - // 发送请求并处理响应 - client.sendAsync(request, HttpResponse.BodyHandlers.ofString()) + // 发送请求并处理响应(使用共享的 HttpClient) + SHARED_HTTP_CLIENT.sendAsync(request, HttpResponse.BodyHandlers.ofByteArray()) .thenApply(response -> { log.debug("Response Status Code: {}", response.statusCode()); - log.debug("Response Body: {}", response.body()); - promise.complete(response.body()); + promise.complete(toLimitedString(response.body())); + return null; + }) + .exceptionally(e -> { + log.error("sendHttpRequest2 请求失败: {}", e.getMessage()); + promise.fail(e); return null; }); @@ -208,18 +220,13 @@ public class PodTool extends PanBase { } public Future sendHttpRequest(String url, String token) { - // 创建一个 WorkerExecutor 用于异步执行阻塞的 HTTP 请求 - WorkerExecutor executor = WebClientVertxInit.get().createSharedWorkerExecutor("http-client-worker"); - Promise promise = Promise.promise(); - executor.executeBlocking(() -> { - HttpClient client = HttpClient.newHttpClient(); - HttpRequest request = null; - + getWorkerExecutor().executeBlocking(() -> { try { // 构造请求 - request = HttpRequest.newBuilder() + HttpRequest request = HttpRequest.newBuilder() .uri(new URI(url)) + .timeout(REQUEST_TIMEOUT) .header("accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;" + "v=b3;q=0.7") @@ -244,17 +251,49 @@ public class PodTool extends PanBase { .POST(HttpRequest.BodyPublishers.ofString("badger_token=" + token)) .build(); - // 发起请求并获取响应 - HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); + // 发起请求并获取响应(使用共享的 HttpClient) + HttpResponse response = SHARED_HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofByteArray()); // 返回响应体 - promise.complete(response.body()); + promise.complete(toLimitedString(response.body())); return null; } catch (URISyntaxException | IOException | InterruptedException e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } throw new RuntimeException(e); } - }); + }).onFailure(promise::fail); return promise.future(); } -} \ No newline at end of file + + private static String toLimitedString(byte[] body) { + if (body.length > MAX_RESPONSE_BODY_BYTES) { + throw new IllegalArgumentException("OneDrive响应体过大: " + body.length + " bytes"); + } + return new String(body, java.nio.charset.StandardCharsets.UTF_8); + } + + private static WorkerExecutor getWorkerExecutor() { + synchronized (PodTool.class) { + if (workerExecutorShutdown) { + throw new IllegalStateException("OneDrive WorkerExecutor 已关闭"); + } + if (SHARED_WORKER_EXECUTOR == null) { + SHARED_WORKER_EXECUTOR = WebClientVertxInit.get().createSharedWorkerExecutor("http-client-worker", 8); + } + return SHARED_WORKER_EXECUTOR; + } + } + + public static void shutdownWorkerExecutor() { + synchronized (PodTool.class) { + workerExecutorShutdown = true; + if (SHARED_WORKER_EXECUTOR != null) { + SHARED_WORKER_EXECUTOR.close(); + SHARED_WORKER_EXECUTOR = null; + } + } + } +} diff --git a/parser/src/main/java/cn/qaiu/parser/impl/PvyyTool.java b/parser/src/main/java/cn/qaiu/parser/impl/PvyyTool.java index 1665ba9..1a47dec 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/PvyyTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/PvyyTool.java @@ -1,6 +1,5 @@ package cn.qaiu.parser.impl; -import cn.qaiu.WebClientVertxInit; import cn.qaiu.entity.FileInfo; import cn.qaiu.entity.ShareLinkInfo; import cn.qaiu.parser.PanBase; @@ -11,7 +10,6 @@ import io.vertx.core.MultiMap; import io.vertx.core.Promise; import io.vertx.core.json.JsonObject; import io.vertx.core.json.pointer.JsonPointer; -import io.vertx.ext.web.client.WebClient; import io.vertx.uritemplate.UriTemplate; import java.util.List; @@ -53,9 +51,8 @@ public class PvyyTool extends PanBase { @Override public Future parse() { - // 请求downcode - WebClient.create(WebClientVertxInit.get()) - .getAbs(api + shareLinkInfo.getShareKey()) + // 请求downcode - 使用父类的共享 WebClient 而非创建新实例 + client.getAbs(api + shareLinkInfo.getShareKey()) .send() .onSuccess(res -> { if (res.statusCode() == 200) { diff --git a/parser/src/main/java/cn/qaiu/parser/impl/QQscTool.java b/parser/src/main/java/cn/qaiu/parser/impl/QQscTool.java index a92534d..ef57912 100644 --- a/parser/src/main/java/cn/qaiu/parser/impl/QQscTool.java +++ b/parser/src/main/java/cn/qaiu/parser/impl/QQscTool.java @@ -63,6 +63,11 @@ public class QQscTool extends PanBase { x-oidb: {"uint32_command":"0x93d4", "uint32_service_type":"1"} """); + private static final Pattern FILESET_ID_PATTERN = Pattern.compile( + "fileset_id[^a-f0-9]*([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})"); + + private static final Pattern TITLE_PATTERN = Pattern.compile("(.*?)"); + public QQscTool(ShareLinkInfo shareLinkInfo) { super(shareLinkInfo); } @@ -247,7 +252,9 @@ public class QQscTool extends PanBase { .put("sort_order", 0))))) .put("support_folder_status", true); - MultiMap headers = GET_FILE_LIST_HEADERS.set("Referer", shareLinkInfo.getShareUrl()); + // 创建局部副本,避免修改静态 MultiMap 导致并发污染 + MultiMap headers = MultiMap.caseInsensitiveMultiMap().addAll(GET_FILE_LIST_HEADERS) + .set("Referer", shareLinkInfo.getShareUrl()); client.postAbs(GET_FILE_LIST_API) .putHeaders(headers) @@ -283,9 +290,7 @@ public class QQscTool extends PanBase { String extractFilesetId(String html) { // Nuxt __NUXT_DATA__ 中 fileset_id 出现在缓存 key 的嵌套 JSON 中 // 直接匹配 fileset_id 后面最近的 UUID(跳过转义引号、冒号等非hex字符) - Pattern pattern = Pattern.compile( - "fileset_id[^a-f0-9]*([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})"); - Matcher matcher = pattern.matcher(html); + Matcher matcher = FILESET_ID_PATTERN.matcher(html); if (matcher.find()) { return matcher.group(1); } @@ -326,8 +331,7 @@ public class QQscTool extends PanBase { } public static String extractFileNameFromTitle(String content) { - Pattern pattern = Pattern.compile("(.*?)"); - Matcher matcher = pattern.matcher(content); + Matcher matcher = TITLE_PATTERN.matcher(content); if (matcher.find()) { String fullTitle = matcher.group(1); int sepIndex = fullTitle.indexOf("|");