如何让主线程最后完成

伊霍尔巴拉拜

我有一个在 Executor.newCachedThreadPool() 的帮助下工作的解析器,并面临这样一个事实,即写入 JSON 文件的记录的主线程在子线程之前执行。结果,我们有一个空文件......我对多线程的主题了解得很差,无法理解错误。我尝试在主线程上使用Join()方法,但最终程序在接近这部分时就挂了

主程序

import model.Product;

import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;

public class Main {

    public static void main(String[] args) throws InterruptedException {

        String rootUrl = "example.com";
        System.out.println("Started parsing: " + rootUrl);
        long m = System.currentTimeMillis();

        HtmlParser htmlParser = new HtmlParser();
        List<Product> productList = new CopyOnWriteArrayList<>();
        htmlParser.parse(rootUrl, productList);

        Printer.printToJson(productList);

        System.out.println("Finish: completed in " + ((double) System.currentTimeMillis() - m) / 1000 + " seconds");
    }
}

HtmlParser.java

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ua.bala.model.Product;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

public class HtmlParser {

    private static AtomicInteger httpRequestsCounter = new AtomicInteger(0);

    public static AtomicInteger getHttpRequestsCounter() {
        return httpRequestsCounter;
    }

    public void parse(String url, List<Product> productList) {
        try {
            Document page = getPage(url);
            parsePage(page, productList);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static Document getPage(String url) throws IOException {
        Document document = Jsoup.connect(url).get();
        httpRequestsCounter.getAndIncrement();
        return document;
    }

    private void parsePage(Document page, List<Product> productList) {
        Elements productElements = page.select("a.dgBQdu");

        ExecutorService service = Executors.newCachedThreadPool();
        for (Element element: productElements){
            service.execute(() -> {

                Long articleID = Long.parseLong(element.attr("id"));
                String name = "NAME";
                String brand = "BRAND";
                BigDecimal price = new BigDecimal(BigInteger.ZERO);
                Set<String> colors = new HashSet<>();
                String url = "https://www.aboutyou.de" + element.attr("href");
                Document innerPage;

                try {
                    innerPage = getPage(url);
                    Element innerElement = innerPage.selectFirst("[data-test-id='BuyBox']");
                    name = innerElement.selectFirst("div.dZjUXd").text();
                    brand = innerElement.selectFirst("[data-test-id='BrandLogo']").attr("alt");
                    colors = new HashSet<>(innerElement.select("span.jlvxcb-1").eachText());
                    String priceStr = innerElement.selectFirst("div.dWWxvw > span").text().replace("ab ","").replace(" EUR","").replace(",", ".");
                    price = new BigDecimal(priceStr);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                Product product = new Product(articleID, name, brand, colors, price, url);
                addProduct(product, productList);
            });
        }
        service.shutdown();
    }

    private synchronized void addProduct(Product product, List<Product> productList){
        System.out.println("Product " + product.getID() + " parsed");
        System.out.print(product);
        productList.add(product);
        System.out.printf("Product %d added to list\n%n", product.getID());
    }
}

打印机.java

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import model.Product;

import java.io.*;
import java.util.Comparator;
import java.util.List;

public class Printer {

    private static final String path = "";
    private static final String fileName = "productsOutput";

    public static void printToJson(List<Product> products){

        products.sort(Comparator.comparing(Product::getID));

        System.out.println("Product list start printing to JSON");
        try (final Writer writer = new FileWriter(path + fileName + ".json")) {
            Gson gson = new GsonBuilder().create();
            gson.toJson(products, writer);
            System.out.println("Product list printed to JSON");
            System.out.printf("Amount of triggered HTTP requests: %s%nAmount of extracted products: %s%n",
                                 HtmlParser.getHttpRequestsCounter(), products.size());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

打印机.java

package model;

import lombok.*;

import java.math.BigDecimal;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

@NoArgsConstructor
@Getter
@Setter
public class Product {

    private static AtomicLong productsCounter = new AtomicLong(1);

    private Long ID;
    private Long articleID;
    private String name;
    private String brand;
    private BigDecimal price;
    private Set<String> colors;
    private String url;

    {
        ID = productsCounter.getAndIncrement();
    }

    public Product(Long articleID, String name, String brand, Set<String> colors, BigDecimal price, String url) {
        this.articleID = articleID;
        this.name = name;
        this.brand = brand;
        this.price = price;
        this.colors = colors;
        this.url = url;
    }

    public static AtomicLong getProductsCounter() {
        return productsCounter;
    }

    @Override
    public String toString() {
        return String.format("%d\t%d\t%s\t%s\t%s\t%s\t%s\n", ID, articleID, name, brand, price, colors, url);
    }
}
宇宙电子

有几种方法可以克服这个问题。使用可观察对象,或阻塞主线程或使用接口而不阻塞主线程。对我来说,界面将是一个不错的选择。如果您熟悉 java 接口,则可以实现一个接口以打印最近解析的产品。这是一步一步的方法:

接口类:

public interface ProductsListener {
    void onProductsReady(List<Product> products);
}

MainImpl 类(不是 Main 类本身):

public class MainImpl implements ProductListener {
    // When product list loading is done this func will be called
    void onProductsRead(List<Product> products) {
        Printer.printToJson(productList);
    }
}

在主类中:

public class Main {
    public static void main(String[] args) throws InterruptedException {
        MainImpl listener = new MainImpl();
        htmlParser.setProductListener(listener);
        // Rest of the code...
    }
}

在 HtmlParser 类中:

public class HtmlParser {
    private MainImpl productListener;
    //...

    public void setProductListener(MainImpl listener) {
        // Alternatively you can do it in a constructor
        productListener = listener;
    }
    //...

    private void parsePage(Document page, List<Product> productList) {
        Elements productElements = page.select("a.dgBQdu");
        int parseCount = 0;

        ExecutorService service = Executors.newCachedThreadPool();
        for (Element element: productElements){
            service.execute(() -> {

                Long articleID = Long.parseLong(element.attr("id"));
                String name = "NAME";
                String brand = "BRAND";
                BigDecimal price = new BigDecimal(BigInteger.ZERO);
                Set<String> colors = new HashSet<>();
                String url = "https://www.aboutyou.de" + element.attr("href");
                Document innerPage;

                try {
                    innerPage = getPage(url);
                    Element innerElement = innerPage.selectFirst("[data-test-id='BuyBox']");
                    name = innerElement.selectFirst("div.dZjUXd").text();
                    brand = innerElement.selectFirst("[data-test-id='BrandLogo']").attr("alt");
                    colors = new HashSet<>(innerElement.select("span.jlvxcb-1").eachText());
                    String priceStr = innerElement.selectFirst("div.dWWxvw > span").text().replace("ab ","").replace(" EUR","").replace(",", ".");
                    price = new BigDecimal(priceStr);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                Product product = new Product(articleID, name, brand, colors, price, url);
                addProduct(product, productList);
                parseCount++; // Count each element that has been parsed
                // Check if all elements have been parsed
                if(parseCount >= productElements.size()) {
                    // All products are done, notify the listener class
                    productListener.onProductsReady(productList);
                }
            });
    }
}

未测试,但接口逻辑必须工作。

本文收集自互联网,转载请注明来源。

如有侵权,请联系 [email protected] 删除。

编辑于
0

我来说两句

0 条评论
登录 后参与评论

相关文章