41

SpringBoot+Vue3 项目实战,打造企业级在线办公系统

 3 years ago
source link: https://segmentfault.com/a/1190000040586313
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

download:SpringBoot+Vue3 项目实战,打造企业级在线办公系统

package com.zzger.model;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;

import com.zzger.module.queue.UrlQueue;
import com.zzger.util.HttpUtils;
import com.zzger.util.RegexUtils;

public class WebSite {

/**
 * 站点url
 */
private String url;
   
/**
 * 需要爬行的url队列
 */
private UrlQueue<String> urls = new UrlQueue<>();
   
/**
 * 已爬行过的页面url
 */
private List<String> exitUrls = Collections.synchronizedList(new ArrayList<>());
   
private static final int TOTAL_THREADS = 12;  
   
private final CountDownLatch mStartSignal = new CountDownLatch(1);  
   
private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS);   
   
public WebSite(String url){
    this.url = url;
    urls.offer(url);//把网站首页加入需要爬行的队列中
}
   
public void guangDu(){
    new Thread(new Runnable() {
        @Override
        public void run() {
            paxing(HttpUtils.httpGet(url));
        }
    }).start();
}
   
public void paxing(String html){
    if(html.lastIndexOf("下一页</a></li></ul></div>")<0)    return ;
    String strList = html.substring(html.indexOf("<li class=\\"next-page\\">"), 
            html.lastIndexOf("下一页</a></li></ul></div>"));
    String url = RegexUtils.RegexString("<a href=\\"(.+?)\\"", strList);
    if(url.equals("Nothing")) return ;
    urls.put(url);//把url存储到队列中
    paxing(HttpUtils.httpGet(url));
}
   
public void dxcPx(){
    Page<DuanZi> page = new Gxpage(urls.take());
    List<Section<DuanZi>> list = page.ybhqSection().getSections();
    for(Section<DuanZi> section : list){
        new Thread(new Runnable() {
            @Override
            public void run() {
                mStartSignal.countDown();// 计数减一为0,工作线程真正启动具体操作   
                try {
                    mStartSignal.await();// 阻塞,等待mStartSignal计数为0运行后面的代码   
                    // 所有的工作线程都在等待同一个启动的命令   
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
                DuanZi duanzi = section.select().getModel();
                System.out.println(duanzi.getTitle());
                mDoneSignal.countDown();// 完成以后计数减一   
            }
        }
        ).start();
    }
    try
    {  
        mDoneSignal.await();// 等待所有工作线程结束   
    }  
    catch (InterruptedException e)  
    {  
        e.printStackTrace();  
    }  
    dxcPx();//线程任务执行完后,再次获取url队列进行任务
}
public static void main(String[] args) {
    WebSite web = new WebSite("http://duanziwang.com");
    web.guangDu();
    for(int i = 0; i<10;i++){
        new Thread(new Runnable() {
            @Override
            public void run() {
                web.dxcPx();
            }
        }).start();
    }
       
}

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK