#!/usr/bin/env python3 """ 检查网站资源文件完整性的脚本 """ import os import re from pathlib import Path def extract_resource_links(file_path): """从HTML文件中提取资源链接""" with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 匹配图片链接 img_pattern = r'src="([^"]*\.(?:jpg|jpeg|png|gif|svg|ico))"' # 匹配CSS和JS链接(相对路径) resource_pattern = r'(?:src|href)="([^"]*\.(?:css|js))"' img_links = re.findall(img_pattern, content, re.IGNORECASE) resource_links = re.findall(resource_pattern, content, re.IGNORECASE) # 过滤掉外部链接 local_img_links = [link for link in img_links if not link.startswith(('http://', 'https://', '//'))] local_resource_links = [link for link in resource_links if not link.startswith(('http://', 'https://', '//'))] return local_img_links + local_resource_links def check_resources(): """检查所有资源文件是否存在""" website_dir = Path(__file__).parent html_files = list(website_dir.glob('*.html')) missing_resources = [] existing_resources = [] for html_file in html_files: print(f"检查文件: {html_file.name}") resources = extract_resource_links(html_file) for resource in resources: resource_path = website_dir / resource if resource_path.exists(): existing_resources.append((html_file.name, resource, "存在")) else: missing_resources.append((html_file.name, resource, "缺失")) # 输出结果 print("\n=== 资源检查结果 ===") if missing_resources: print("\n❌ 缺失的资源文件:") for file, resource, status in missing_resources: print(f" {file} -> {resource} ({status})") else: print("\n✅ 所有资源文件都存在") if existing_resources: print(f"\n✅ 存在的资源文件: {len(existing_resources)} 个") return missing_resources if __name__ == "__main__": check_resources()