lastest version of the web app
lastest version of the web app
#Content Extractor Settings File
#Thu Feb 10 18:55:25 EST 2005
<A>\ tags\ are\ substance=true
<FORM>\ tags\ are\ substance=true
Ignore\ Image\ Links=false
Ignore\ Meta\ Tags=true
Display\ Image\ Link\ ALTs=true
Ignore\ Text\ Links=false
<INPUT>\ tags\ are\ substance=true
Ignore\ <IFRAME>\ Tags=true
<BUTTON>\ tags\ are\ substance=true
Ignore\ All\ Advertisements=true
<SELECT>\ tags\ are\ substance=true
Ignore\ External\ Stylesheets=false
<IFRAME>\ tags\ are\ substance=true
Ignore\ <EMBED>\ tags=true
Ignore\ Styles=false
Ignore\ Only\ Links\ and\ Text\ in\ Link\ Lists=true
Ignore\ <INPUT>\ Tags=true
Ignore\ Flash=true
Ignore\ Image\ Links\ in\ Link\ Lists=true
<IMG>\ tags\ are\ substance=true
<TEXTAREA>\ tags\ are\ substance=true
Ignore\ Scripts=true
Ignore\ Forms=false
Display\ Image\ ALTs=true
Maximum\ Number\ of\ Line\ Breaks=2
Ignore\ <BUTTON>\ Tags=false
Limit\ Number\ of\ Line\ Breaks=false
Ignore\ Style\ Attribute\ in\ <DIV>\ Tags=false
Ignore\ <NOSCRIPT>\ Tags=true
Add\ removed\ links\ to\ bottom\ of\ the\ page=false
Minimum\ text\ length\ as\ substance=12
Remove\ Empty\ Tables=true
Ignore\ Text\ Links\ in\ Link\ Lists=true
Ignore\ Table\ Cell\ Widths=true
Ignore\ Style\ Attributes=false
Ignore\ <SELECT>\ Tags=true
Ignore\ Link\ Lists=true
Link/Text\ Removal\ Ratio=0.5
Ignore\ Images=false
Print\ Only\ Text=false