'is there any way to add the description present in name link using css selector
here is my new code in this code I got the output like this
{ "names": "Augustine T.K. James (6360)", "dates": "29/11/2021" } { "description": "Augustine T.K. James (6360) as the sole proprietor of Messrs. James & Co (“the Firm”) had been punished and imposed a suspension of 3 months effective 1/1/2022, fine of RM5000-00, costs of RM4000-00 and ordered to attend a course conducted by the Institute on Audit Quality Enhancement Program by the Disciplinary Committee of the Institute on 29/11/2021 after the Firm had been rated as ‘unsatisfactory’ as indicated in the Monitoring Review Report dated 10/6/2019 which detailed the weaknesses in the audit work performed." }
I want them to be under one curly braces like { "name": "XYZ", "date":"29/11/2021" "description:":"XYZ xyz xyz" }
package com.company;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import net.minidev.json.JSONObject;
import org.apache.juneau.serializer.SerializeException;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.apache.juneau.json.JsonSerializer;
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) throws SerializeException {
ArrayList<Product> name_date_list = new ArrayList<>();
ArrayList<Desc_Product> desc_list = new ArrayList<>();
ArrayList<String> combine_list = new ArrayList<>();
System.setProperty("webdriver.chrome.driver","F:\\browser driver\\chromedriver.exe");
WebDriver driver1 = new ChromeDriver();
driver1.get("https://www.mia.org.my/v2/surveillance/disciplinary/decisions.aspx");
for(int i = 0 ;i<8;i++){
for (int j =0;j<10;j++ ){
try {
List<WebElement> d1 = driver1.findElements(By.cssSelector("a[id*='PageContents_LVDecisions_ctrl']"));
WebElement webElementd1 = d1.get(j);
webElementd1.click();
List<WebElement> d3 = driver1.findElements(By.cssSelector("article.br5.bgaliceblue.alignjustify"));
WebElement webElementd3 = d3.get(0);
JsonSerializer descSerialization = JsonSerializer.DEFAULT_READABLE;
Desc_Product desc_product = new Desc_Product(webElementd3.getText());
String desc_json = descSerialization.serialize(desc_product);
desc_list.add(desc_product);
//System.out.println(desc_json);
driver1.navigate().back();
} catch (IndexOutOfBoundsException e) {
}
}
//System.out.println("pressed next page");
List<WebElement> ed1 = driver1.findElements(By.cssSelector("input.dbutton"));
WebElement webElemented1 = ed1.get(4);
webElemented1.click();
}
WebDriver driver2 = new ChromeDriver();
driver2.get("https://www.mia.org.my/v2/surveillance/disciplinary/decisions.aspx");
for (int i =0 ;i<8;i++){
List<WebElement> e1 = driver2.findElements(By.cssSelector("td.widthtd75"));
List<WebElement> e3 = driver2.findElements(By.cssSelector("span[id*='PageContents_LVDecisions_ctrl']"));
for(int j = 0 ;j<10;j++){
try {
WebElement webElement1 = e1.get(j);
// System.out.println(webElement1.getText());
WebElement webElement3 = e3.get(j);
// System.out.println(webElement3.getText());
JsonSerializer jsonSerializer = JsonSerializer.DEFAULT_READABLE;
Product product = new Product(webElement1.getText(),webElement3.getText());
try {
String name_date = jsonSerializer.serialize(product);
name_date_list.add(product);
}
catch (SerializeException e) {
e.printStackTrace();
}
}catch (Exception e){}
}
List<WebElement> e2 = driver2.findElements(By.cssSelector("input.dbutton"));
WebElement webElement = e2.get(4);
webElement.click();
}
for(int i =0; i<100;i++){
JsonSerializer jsonSerializer = JsonSerializer.DEFAULT_READABLE;
String name_date = jsonSerializer.serialize(name_date_list.get(i));
System.out.println(name_date);
String desc_ = jsonSerializer.serialize((desc_list.get(i)));
System.out.println(desc_);
System.out.println("-----------------------------------------------");
}
}
}
Description class package com.company;
import org.apache.juneau.annotation.BeanConstructor;
public class Desc_Product {
private String description;
@BeanConstructor(properties = "description")
public Desc_Product(String description) {
this.description = description;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
// @Override
// public String toString() {
// return "Desc_Product{" +
// "description='" + description + '\'' +
// '}';
// }
@Override
public String toString() {
return " \n" +
"description :"+ description;
}
}
product class
package com.company;
import org.apache.juneau.annotation.BeanConstructor;
import org.openqa.selenium.WebElement;
public class Product {
private String names;
private String dates;
private String description;
public Product() {
}
public String getNames() {
return String.valueOf(names);
}
public void setNames(String names) {
this.names = names;
}
public String getDates() {
return dates;
}
public void setDates(String dates) {
this.dates = dates;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
//
// @BeanConstructor(properties = "names,dates,description")
// public Product(String names, String dates, String description) {
// this.names = names;
// this.dates = dates;
// this.description = description;
// }
@BeanConstructor(properties = "names,dates")
public Product(String names, String dates) {
this.names = names;
this.dates = dates;
}
// @BeanConstructor(properties = "names,dates,description")
// public Product(String names, String dates,String description) {
// this.names = names;
// this.dates = dates;
// this.description = description;
// }
// @Override
// public String toString() {
// return "{" +
// "names='" + names + '\'' +
// ", dates='" + dates + '\'' +
// ", description='" + description + '\'' +
// '}';
// }
@Override
public String toString() {
return "name :"+names +"\n"+
"dates :"+dates;
}
}
Solution 1:[1]
I don't have access to the Desc_Product or Product classes. I tried to recreate your program anyways. When trying to recreate it, I managed to print out the description on each page. Here is my recreation.
Is there a reason you need to use the serializer or do you only need to scrape the pages for their description? If You only need the description then check the example I linked and let me know what you think. (I would have commented all this but I don't have enough reputation to comment yet)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Marko Delic |
