-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCrawl_EntryPage.java
86 lines (68 loc) · 2.22 KB
/
Crawl_EntryPage.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package focus;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.scene.control.TextArea;
/**
*
* @author rakesh
*/
public class Crawl_EntryPage {
String entryPageURL;
Filter filter;
TextArea progressText,pagesCounterText;
/**
*
* @param text_area
* @param url
* @param f
* @param counter
*/
public Crawl_EntryPage(TextArea text_area,String url,Filter f,TextArea counter)
{
this.progressText=text_area;
this.entryPageURL=url;
filter=f;
this.pagesCounterText=counter;
}
/**
*
* @return
* @throws IOException
* @throws ClassNotFoundException
* @throws SQLException
*/
public ArrayList<String> crawl_page() throws IOException, ClassNotFoundException, SQLException
{
Crawl_URL obj=new Crawl_URL(progressText,entryPageURL,filter,pagesCounterText);
try {
ArrayList<String> str=obj.get_url_paths(entryPageURL);
for(int j=0;j<str.size();j++){
String i=str.get(j);
//System.out.println(i);
if(i.length()>1) // Get rid of only 1 char elements and first slash from paths
{
i=i.substring(1,i.length());
str.set(j, i);
}
else
{
str.set(j,""); //nullifying 1 char elements
}
}
str.removeAll(Arrays.asList("", null)); // removing 1 chars elements [SO]
return str;
} catch (IOException | SQLException ex) {
Logger.getLogger(Crawl_EntryPage.class.getName()).log(Level.SEVERE, null, ex);
}
return null;
}
}