note: need to run once before choosing plugin
note: need to run once before choosing plugin
diff --git a/.classpath b/.classpath
index a2dae70..c05ebe4 100644
--- a/.classpath
+++ b/.classpath
@@ -1,20 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
- <classpathentry excluding="psl/crunch3/util/wrapperinduction/" output="output" kind="src" path="src"/>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
- <classpathentry kind="lib" path="jars/swt.jar"/>
- <classpathentry kind="lib" path="jars/nekohtml-0.8.2/nekohtml.jar"/>
- <classpathentry kind="lib" path="jars/xerces-2_6_0/xercesImpl.jar"/>
- <classpathentry kind="lib" path="jars/xerces-2_6_0/xmlParserAPIs.jar"/>
+ <classpathentry kind="src" path="src"/>
<classpathentry kind="lib" path="jars/googleapi/googleapi.jar"/>
<classpathentry kind="lib" path="jars/jfreechart/jfreechart-0.9.21/jfreechart-0.9.21.jar"/>
- <classpathentry kind="lib" path="jars/jfreechart/jfreechart-0.9.21/lib/jcommon-0.9.6.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/servlet-api.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/jasper-runtime.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/jsp-api.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/jstl.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/standard.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/mysql-connector-java-3.1.10-bin.jar"/>
- <classpathentry kind="lib" path="/usr/local/jakarta-tomcat-5.5.9/common/lib/mm.mysql-2.0.13-bin.jar"/>
+ <classpathentry kind="lib" path="jars/jcommon-0.9.7/jcommon-0.9.7.jar"/>
+ <classpathentry kind="lib" path="jars/nekohtml-0.8.3/nekohtml.jar"/>
+ <classpathentry kind="lib" path="jars/xerces-2_6_0/xercesImpl.jar"/>
+ <classpathentry kind="lib" path="jars/xerces-2_6_0/xmlParserAPIs.jar"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="lib" path="jars/swt.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
diff --git a/.project b/.project
index 477132b..c85a936 100644
--- a/.project
+++ b/.project
@@ -10,11 +10,6 @@
<arguments>
</arguments>
</buildCommand>
- <buildCommand>
- <name>net.sourceforge.metrics.builder</name>
- <arguments>
- </arguments>
- </buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
@@ -23,16 +18,4 @@
<nature>net.sourceforge.metrics.nature</nature>
<nature>com.sysdeo.eclipse.tomcat.tomcatnature</nature>
</natures>
- <linkedResources>
- <link>
- <name>output</name>
- <type>2</type>
- <location>/usr/local/jakarta-tomcat-5.5.9/webapps/crunch/WEB-INF/classes</location>
- </link>
- <link>
- <name>Crunch_WebApp</name>
- <type>2</type>
- <location>/usr/local/jakarta-tomcat-5.5.9/webapps/crunch</location>
- </link>
- </linkedResources>
</projectDescription>
diff --git a/config/custom.ini b/config/custom.ini
index 93be9a2..b382499 100644
--- a/config/custom.ini
+++ b/config/custom.ini
@@ -1,42 +1,42 @@
#Content Extractor Settings File
-#Wed Jul 27 20:08:57 EDT 2005
+#Wed Sep 21 15:21:53 EDT 2005
<A>\ tags\ are\ substance=true
<FORM>\ tags\ are\ substance=true
-Ignore\ Image\ Links=false
-Ignore\ Meta\ Tags=false
+Ignore\ Image\ Links=true
+Ignore\ Meta\ Tags=true
Display\ Image\ Link\ ALTs=true
-Ignore\ Text\ Links=true
+Ignore\ Text\ Links=false
<INPUT>\ tags\ are\ substance=true
Ignore\ <IFRAME>\ Tags=false
<BUTTON>\ tags\ are\ substance=true
Ignore\ All\ Advertisements=true
<SELECT>\ tags\ are\ substance=true
-Ignore\ External\ Stylesheets=false
+Ignore\ External\ Stylesheets=true
<IFRAME>\ tags\ are\ substance=true
Ignore\ <EMBED>\ tags=false
-Ignore\ Styles=false
+Ignore\ Styles=true
Ignore\ Only\ Links\ and\ Text\ in\ Link\ Lists=true
-Ignore\ <INPUT>\ Tags=false
-Ignore\ Flash=false
+Ignore\ <INPUT>\ Tags=true
+Ignore\ Flash=true
Ignore\ Image\ Links\ in\ Link\ Lists=true
<IMG>\ tags\ are\ substance=true
<TEXTAREA>\ tags\ are\ substance=true
Ignore\ Scripts=true
-Ignore\ Forms=false
+Ignore\ Forms=true
Display\ Image\ ALTs=true
Maximum\ Number\ of\ Line\ Breaks=2
-Ignore\ <BUTTON>\ Tags=false
+Ignore\ <BUTTON>\ Tags=true
Limit\ Number\ of\ Line\ Breaks=false
-Ignore\ Style\ Attribute\ in\ <DIV>\ Tags=false
+Ignore\ Style\ Attribute\ in\ <DIV>\ Tags=true
Ignore\ <NOSCRIPT>\ Tags=true
Add\ removed\ links\ to\ bottom\ of\ the\ page=false
Minimum\ text\ length\ as\ substance=12
Remove\ Empty\ Tables=false
Ignore\ Text\ Links\ in\ Link\ Lists=true
Ignore\ Table\ Cell\ Widths=false
-Ignore\ Style\ Attributes=false
-Ignore\ <SELECT>\ Tags=false
+Ignore\ Style\ Attributes=true
+Ignore\ <SELECT>\ Tags=true
Link/Text\ Removal\ Ratio=0.3
-Ignore\ Images=false
-Ignore\ Link\ Lists=true
+Ignore\ Images=true
+Ignore\ Link\ Lists=false
Print\ Only\ Text=false
diff --git a/src/psl/crunch3/Crunch3.java b/src/psl/crunch3/Crunch3.java
index d650d47..b6af387 100644
--- a/src/psl/crunch3/Crunch3.java
+++ b/src/psl/crunch3/Crunch3.java
@@ -38,11 +38,14 @@ public class Crunch3 {
Runtime.getRuntime().addShutdownHook(new ShutdownThread());
mainControl = new MainControl(Crunch3.settings.isGUISet());
- if(Crunch3.settings.isGUISet()) mainWindow = new MainWindow(mainControl);
+
proxy = new Proxy(settings.getListenPort());
- proxy.registerPlugin(new ContentExtractor());
+ if(Crunch3.settings.isGUISet()) mainWindow = mainControl.activateGUI();
+
+
+ proxy.registerPlugin(mainControl.getContentExtractor());
proxy.registerPlugin(new SamplePlugin());
proxy.registerPlugin(new SizeModifier());
diff --git a/src/psl/crunch3/Crunch3Settings.java b/src/psl/crunch3/Crunch3Settings.java
index df99a09..ae8f31f 100644
--- a/src/psl/crunch3/Crunch3Settings.java
+++ b/src/psl/crunch3/Crunch3Settings.java
@@ -29,7 +29,7 @@ public class Crunch3Settings {
public static final boolean PROXY_MODE_DEF = true;
public static String SETTINGS_FILE = "config/content extractor settings.ini";
public static final boolean CHECK_HOMEPAGE_DEF = false;
- public static final boolean RUN_ON_SERVER = true;
+ public static final boolean RUN_ON_SERVER = false;
//internal variables
private String[] arguments;
diff --git a/src/psl/crunch3/HttpStream.java b/src/psl/crunch3/HttpStream.java
index 8a1cd31..034ba32 100644
--- a/src/psl/crunch3/HttpStream.java
+++ b/src/psl/crunch3/HttpStream.java
@@ -108,7 +108,7 @@ public class HttpStream extends Thread {
try {
readClientFirstLine();
- Crunch3.mainControl.printStatus();
+ if(!Crunch3.settings.isGUISet())Crunch3.mainControl.printStatus();
} catch (Exception e) {
if (Crunch3.settings.isVerbose())
e.printStackTrace();
diff --git a/src/psl/crunch3/MainControl.java b/src/psl/crunch3/MainControl.java
index 4707cb4..057b110 100644
--- a/src/psl/crunch3/MainControl.java
+++ b/src/psl/crunch3/MainControl.java
@@ -36,9 +36,13 @@ public class MainControl extends Thread{
public void run(){
+ ce = new ContentExtractor();
+ description = ce.getControl();
+
if(!GUIActive){
menuLoop();
}
+ else yield();
}
@@ -52,6 +56,7 @@ public class MainControl extends Thread{
Scanner in = new Scanner(System.in);
ce = new ContentExtractor();
description = ce.getControl();
+
yield();
Crunch3.proxy.registerPlugin(ce);
@@ -177,17 +182,19 @@ public class MainControl extends Thread{
/**
* Activate the GUI
*/
- private void activateGUI(){
+ public MainWindow activateGUI(){
GUIActive = true;
Crunch3.settings.setGUI(true);
- Crunch3.mainWindow = new MainWindow(this);
+ MainWindow mw = new MainWindow(this);
- Crunch3.proxy.registerPlugin(new SamplePlugin());
- Crunch3.proxy.registerPlugin(new SizeModifier());
- Crunch3.proxy.registerPlugin(ce);
+ return mw;
}
+ public ContentExtractor getContentExtractor(){
+ return ce;
+ }
+
}
diff --git a/src/psl/crunch3/plugins/contentextractor/ContentExtractor.java b/src/psl/crunch3/plugins/contentextractor/ContentExtractor.java
index 2859b26..9522d53 100644
--- a/src/psl/crunch3/plugins/contentextractor/ContentExtractor.java
+++ b/src/psl/crunch3/plugins/contentextractor/ContentExtractor.java
@@ -170,9 +170,10 @@ public class ContentExtractor extends EnhancedProxyFilter implements SiteDepende
Document newTree;
InputStream in;
+ boolean getNext = false;
try{
- while((linkToAppend !=null) && (linkToAppend != address) ){
+ while((linkToAppend !=null) && (linkToAppend != address) && getNext ){
System.out.println("*** " + linkToAppend);