00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 package dochelper;
00011
00012 import dochelper.exceptions.IllegalParamterTypeException;
00013 import dochelper.exceptions.InitializationException;
00014 import dochelper.exceptions.InvalidValueException;
00015 import dochelper.exceptions.MissingArgumentException;
00016 import dochelper.exceptions.ResourceUnavailableException;
00017 import java.util.ArrayList;
00018 import java.util.HashMap;
00019
00020 import java.util.regex.Pattern;
00021 import java.util.regex.Matcher;
00022 import java.util.regex.PatternSyntaxException;
00023
00024 import java.net.URI;
00025
00037 public class URLRecordSet implements RecordSet {
00038
00042 public static final String ROOT_URL = "rooturl";
00043
00047 public static final String DEPTH = "depth";
00048
00052 protected HashMap<String, Object> params = new HashMap<String, Object>();
00053
00057 protected ArrayList<Record> rsfiles;
00058
00062 protected String name;
00063
00067 protected int recordSetDepth;
00068
00072 protected Pattern links = Pattern.compile("a href=\"([.]+)\"");
00073
00074
00075
00076 private ArrayList<DocPattern> patterns;
00077
00091 public void setParam(String paramName, Object value) throws IllegalParamterTypeException {
00092
00093 if (value instanceof String || value instanceof DocumentSection)
00094 params.put(paramName, value);
00095 else
00096 throw new IllegalParamterTypeException(paramName);
00097 }
00098
00110 public void checkParams() throws InvalidValueException, MissingArgumentException, ResourceUnavailableException {
00111
00112
00113 if (!params.containsKey(ROOT_URL))
00114 throw new MissingArgumentException(ROOT_URL);
00115
00116
00117
00118
00119 String url = (String)params.get(ROOT_URL);
00120
00121
00122
00123
00124
00125 if (!params.containsKey(DEPTH))
00126 throw new MissingArgumentException(DEPTH);
00127
00128 }
00129
00136 public void init() throws InitializationException, ResourceUnavailableException {
00137
00138
00139 recordSetDepth = Integer.parseInt((String)params.get(DEPTH));
00140
00141
00142 rsfiles = new ArrayList<Record>();
00143
00144
00145 fetchRecords(0, (String)params.get(ROOT_URL));
00146
00147 }
00148
00161 private void fetchRecords(int depth, String rootURL) {
00162
00163 depth++;
00164
00165
00166 if(depth >recordSetDepth)
00167 return;
00168
00169
00170 URLRecord record = null;
00171
00172
00173 try {
00174 record = new URLRecord(rootURL, "URLRec");
00175 }
00176 catch(InvalidValueException ive) {
00177
00178 }
00179
00180
00181 if(!rsfiles.contains(record))
00182 rsfiles.add(record);
00183
00184
00185 Matcher match = links.matcher(record.getValue());
00186
00187 while(match.find()) {
00188
00189 fetchRecords(depth, match.group(1));
00190 }
00191 }
00192
00211 public ResultNode execute(ResultNode doc) throws ResourceUnavailableException {
00212
00213
00214 ResultNode result = new ResultNode(null, params, this.getName());
00215
00216
00217 for (Record record : rsfiles) {
00218
00219 result.addChild(record.execute(patterns));
00220 }
00221
00222 doc.addChild(result);
00223
00224 return doc;
00225 }
00226
00232 public String getName() {
00233 return name;
00234 }
00235
00236 public void setName(String name) {
00237 this.name = name;
00238 }
00239
00246 public void addPattern(DocPattern pattern) {
00247
00248 if (this.patterns == null)
00249 this.patterns = new ArrayList<DocPattern>();
00250
00251 this.patterns.add(pattern);
00252 }
00253
00258 public void clearPatterns() {
00259 this.patterns = new ArrayList<DocPattern>();
00260 }
00261
00262
00263 public String toString() {
00264
00265 String out = "";
00266
00267 out += "Files Count : " + rsfiles.size();
00268
00269
00270 return out;
00271 }
00272
00273 }