背景
同步企业微信用户与部门数据,但新版本不提供用户名与部门名称信息,需要前端渲染https://developer.work.weixin.qq.com/document/path/91958#2.2%20%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95
<ww-open-data type="userName" openid="{{openid}}"></ww-open-data>
渲染之后页面结构如下
由于shadow-root为close,查询资料显示此类型无法通过操作dom元素获取内容,尝试获取结果如下图:
解决方法
解决方法:通过自定义chrome插件,强制把close更改为open,操作步骤如下:
新建以下文件
injected.js
Element.prototype._attachShadow = Element.prototype.attachShadow;
Element.prototype.attachShadow = function () {
console.log('attachShadow');
return this._attachShadow( { mode: "open" } );
}
manifest.json
{
“name”: “SeleniumTesting”,
“description”: “将网页上的shadow-root(closed)重载成shadow-root(open)”,
“version”: “1.0”,
“author”: “Author”,
“manifest_version”: 2,
“permissions”: ["<all_urls>", “https://www.baidu.com/"],
“content_scripts”: [{
“matches”: ["https://你需要让它生效的网站.com/”],
“run_at”: “document_start”,
“all_frames”: true,
“js”: [“shadowInject.js”]
}],
“web_accessible_resources”: [“injected.js”]
}
shadowInject.js
const injectedScript = document.createElement('script');
injectedScript.src = chrome.extension.getURL('injected.js');
(document.head || document.documentElement).appendChild(injectedScript);
全程只需要在第二个网站中修改你需要让它生效的网站域名即可。
查看页面元素发现shadow-root为open
打开插件之后重新打开页面,js代码即可获取到姓名
编码
接下来开始编码,由于之前只爬过普通的html代码,此项目中使用vue,需要代码运行在浏览器爬取渲染后的内容,爬取内容为前端根据企业微信开发文档,传入微信用户id、部门id等信息,企业微信通过一系列鉴权动态加入代码到界面中,页面为后台管理界面,实现了点击等操作,点击跳转之后直接访问界面获取到为前一页,便等待几秒之后再去点击下一步或者获取页面等操作。
代码如下:
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.reflect.TypeToken;
import com.ruiyun.jvppeteer.core.Constant;
import com.ruiyun.jvppeteer.core.Puppeteer;
import com.ruiyun.jvppeteer.core.browser.Browser;
import com.ruiyun.jvppeteer.core.browser.BrowserFetcher;
import com.ruiyun.jvppeteer.core.page.Page;
import com.ruiyun.jvppeteer.options.LaunchOptions;
import com.ruiyun.jvppeteer.options.LaunchOptionsBuilder;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
public class PageEvaluteExample {
public static final String FUNCTION_BTN_NEXT = "() => {\n " +
"return document.getElementsByClassName('btn-next')[0].disabled;\n }";
public static void main(String[] args) throws Exception {
BrowserFetcher.downloadIfNotExist(null);
ArrayList<String> arrayList = new ArrayList<>();
LaunchOptions options = new LaunchOptionsBuilder().withArgs(arrayList).withHeadless(false).build();
arrayList.add("--no-sandbox");
arrayList.add("--disable-setuid-sandbox");
//此为本地插件路径
String pathToExtension = "D:\\google-chrome\\plugins\\pachong";
arrayList.add("--disable-extensions-except="+pathToExtension);
arrayList.add("--load-extension="+pathToExtension);
Browser browser = Puppeteer.launch(options);
Page page = browser.newPage();
//dom加载完毕就算导航完成
page.goTo("https://example.com/#/login");
//输入账号密码
//自动登录
page.click(".el-button--primary",false);
Thread.sleep(5000);
page.goTo("https://example.com/#/employeeManagement");
page.goTo("https://example.com/#/employeeManagement");
page.goTo("https://example.com/#/employeeManagement");
Thread.sleep(5000);
List<Future> futureList = new ArrayList<>();
Future future = getFutures(page);
futureList.add(future);
for (int i = 0; i < futureList.size(); i++) {
futureList.get(i);
}
Thread.sleep(5000);
while (!isLast(page)){
Thread.sleep(5000);
page.click(".btn-next",true);
Thread.sleep(5000);
futureList.add(getFutures(page));
}
List users = new ArrayList<>();
List depts = new ArrayList<>();
Gson gson = new Gson();
for (int i = 0; i < futureList.size(); i++) {
Object o = futureList.get(i).get();
Type type = new TypeToken<Map<String, List>>() {}.getType();
Map<String, List> map = gson.fromJson(o.toString(), type);
users.add(map.get("users"));
depts.add(map.get("depts"));
}
System.out.println(users);
System.out.println(depts);
}
private static boolean isLast(Page page) throws ExecutionException, InterruptedException {
List<Future> evaluate = evaluate(page, FUNCTION_BTN_NEXT);
return Boolean.valueOf(evaluate.get(0).get().toString());
}
private static Future getFutures(Page page) {
ThreadPoolExecutor executor = new ThreadPoolExecutor(4,4,40,TimeUnit.MILLISECONDS,new LinkedBlockingQueue<>());
CompletionService service = new ExecutorCompletionService(executor);
Future submit = service.submit(() -> {
//定义执行的方法 ,在java这里不能像nodejs一样直接书写js代码,这里以字符串代替,可以在vs code上编辑代码后再粘贴过来即可。
// String pageFunction = "() => {\n" +
// " return \n" +
// " ;\n" +
// " }";
String pageFunction = "() => {\n " +
"let retVal = '';\n " +
"const users = [];"+
"const depts = [];"+
"let arr = document.getElementsByTagName('ww-open-data');\n " +
"for (i = 0; i < arr.length; i++) {\n" +
" console.log(arr[i].attributes.type.textContent);\n" +
" if (arr[i].attributes.type.textContent==='userName'){\n " +
" users.push({name:arr[i].shadowRoot.textContent,openid:arr[i].attributes.openid.textContent});"+
" }"+
" if (arr[i].attributes.type.textContent==='departmentName'){\n " +
" depts.push({name:arr[i].shadowRoot.textContent,openid:arr[i].attributes.openid.textContent});"+
" }"+
"}"+
// "if (document.documentElement)\n " +
// "retVal += document.getElementsByTagName('ww-open-data')[0].attributes.openid.textContent;\n " +
// "retVal += document.getElementsByTagName('ww-open-data')[0].shadowRoot.textContent;\n " +
"return {'users':users,'depts':depts};\n }";
Object result = page.evaluate(pageFunction);
System.out.println("result:" + Constant.OBJECTMAPPER.writeValueAsString(result));
Gson gson = new Gson();
return Constant.OBJECTMAPPER.writeValueAsString(result);
});
return submit;
}
static ThreadPoolExecutor executor = new ThreadPoolExecutor(4,4,40,TimeUnit.MILLISECONDS,new LinkedBlockingQueue<>());
private static List<Future> evaluate(Page page,String pageFunction) {
CompletionService service = new ExecutorCompletionService(executor);
List<Future> futureList = new ArrayList<>();
Future submit = service.submit(() -> {
//定义执行的方法 ,在java这里不能像nodejs一样直接书写js代码,这里以字符串代替,可以在vs code上编辑代码后再粘贴过来即可。
Object result = page.evaluate(pageFunction);
System.out.println("result:" + Constant.OBJECTMAPPER.writeValueAsString(result));
return result;
});
futureList.add(submit);
return futureList;
}
}
结果展示
打印结果如下
大功告成~收工!!!
评论区