* Added pruning of suckers history (it used to grow indefinitely).
This commit is contained in:
@ -46,9 +46,6 @@ public class Sucker {
|
|||||||
|
|
||||||
public Sucker() {}
|
public Sucker() {}
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor for BlogManager.
|
|
||||||
*/
|
|
||||||
public Sucker(String[] strings) throws IllegalArgumentException {
|
public Sucker(String[] strings) throws IllegalArgumentException {
|
||||||
SuckerState state = new SuckerState();
|
SuckerState state = new SuckerState();
|
||||||
state.pushToSyndie=true;
|
state.pushToSyndie=true;
|
||||||
@ -75,6 +72,7 @@ public class Sucker {
|
|||||||
state.user = state.bm.getUser(blogHash);
|
state.user = state.bm.getUser(blogHash);
|
||||||
if(state.user==null)
|
if(state.user==null)
|
||||||
throw new IllegalArgumentException("wtf, user==null? hash:"+blogHash);
|
throw new IllegalArgumentException("wtf, user==null? hash:"+blogHash);
|
||||||
|
state.history = new ArrayList();
|
||||||
_state = state;
|
_state = state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,11 +160,6 @@ public class Sucker {
|
|||||||
|
|
||||||
_log.debug("message number: " + _state.messageNumber);
|
_log.debug("message number: " + _state.messageNumber);
|
||||||
|
|
||||||
// Create historyFile if missing
|
|
||||||
_state.historyFile = new File(_state.historyPath);
|
|
||||||
if (!_state.historyFile.exists())
|
|
||||||
_state.historyFile.createNewFile();
|
|
||||||
|
|
||||||
_state.shouldProxy = false;
|
_state.shouldProxy = false;
|
||||||
_state.proxyPortNum = -1;
|
_state.proxyPortNum = -1;
|
||||||
if ( (_state.proxyHost != null) && (_state.proxyPort != null) ) {
|
if ( (_state.proxyHost != null) && (_state.proxyPort != null) ) {
|
||||||
@ -194,7 +187,7 @@ public class Sucker {
|
|||||||
boolean ok = lsnr.waitForSuccess();
|
boolean ok = lsnr.waitForSuccess();
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
_log.debug("success? " + ok);
|
_log.debug("success? " + ok);
|
||||||
System.err.println("Unable to retrieve the url after " + numRetries + " tries.");
|
System.err.println("Unable to retrieve the url [" + _state.urlToLoad + "] after " + numRetries + " tries.");
|
||||||
fetched.delete();
|
fetched.delete();
|
||||||
return _state.entriesPosted;
|
return _state.entriesPosted;
|
||||||
}
|
}
|
||||||
@ -213,31 +206,56 @@ public class Sucker {
|
|||||||
|
|
||||||
_log.debug("entries: " + entries.size());
|
_log.debug("entries: " + entries.size());
|
||||||
|
|
||||||
FileOutputStream hos = null;
|
loadHistory();
|
||||||
|
|
||||||
|
// Process list backwards to get syndie to display the
|
||||||
|
// entries in the right order. (most recent at top)
|
||||||
|
List feedMessageIds = new ArrayList();
|
||||||
|
for (int i = entries.size()-1; i >= 0; i--) {
|
||||||
|
SyndEntry e = (SyndEntry) entries.get(i);
|
||||||
|
|
||||||
try {
|
_state.attachmentCounter=0;
|
||||||
hos = new FileOutputStream(_state.historyFile, true);
|
|
||||||
|
if (_log.shouldLog(Log.DEBUG))
|
||||||
|
_log.debug("Syndicate entry: " + e.getLink());
|
||||||
|
|
||||||
|
// Calculate messageId, and check if we have got the message already
|
||||||
|
String feedHash = sha1(_state.urlToLoad);
|
||||||
|
String itemHash = sha1(e.getTitle() + e.getDescription());
|
||||||
|
Date d = e.getPublishedDate();
|
||||||
|
String time;
|
||||||
|
if(d!=null)
|
||||||
|
time = "" + d.getTime();
|
||||||
|
else
|
||||||
|
time = "" + new Date().getTime();
|
||||||
|
String outputFileName = _state.outputDir + "/" + _state.messageNumber;
|
||||||
|
String messageId = feedHash + ":" + itemHash + ":" + time + ":" + outputFileName;
|
||||||
|
|
||||||
// Process list backwards to get syndie to display the
|
// Make sure these messageIds get into the history file
|
||||||
// entries in the right order. (most recent at top)
|
feedMessageIds.add(messageId);
|
||||||
for (int i = entries.size()-1; i >= 0; i--) {
|
|
||||||
SyndEntry e = (SyndEntry) entries.get(i);
|
// Check if we already have this
|
||||||
|
if (existsInHistory(_state, messageId))
|
||||||
_state.attachmentCounter=0;
|
continue;
|
||||||
|
|
||||||
if (_log.shouldLog(Log.DEBUG))
|
infoLog("new: " + messageId);
|
||||||
_log.debug("Syndicate entry: " + e.getLink());
|
|
||||||
|
// process the new entry
|
||||||
String messageId = convertToSml(_state, e);
|
processEntry(_state, e, time);
|
||||||
if (messageId!=null) {
|
|
||||||
hos.write(messageId.getBytes());
|
|
||||||
hos.write("\n".getBytes());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (hos != null) try { hos.close(); } catch (IOException ioe) {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// update history
|
||||||
|
pruneHistory(_state.urlToLoad, 42*10); // could use 0 if we were sure old entries never re-appear
|
||||||
|
Iterator iter = feedMessageIds.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
String newMessageId = (String)iter.next();
|
||||||
|
if(!existsInHistory(_state, newMessageId))
|
||||||
|
addHistory(newMessageId); // add new message ids from current feed to history
|
||||||
|
}
|
||||||
|
storeHistory();
|
||||||
|
|
||||||
|
// call script if we don't just feed syndie
|
||||||
if(!_state.pushToSyndie) {
|
if(!_state.pushToSyndie) {
|
||||||
FileOutputStream fos = null;
|
FileOutputStream fos = null;
|
||||||
try {
|
try {
|
||||||
@ -264,6 +282,111 @@ public class Sucker {
|
|||||||
return _state.entriesPosted;
|
return _state.entriesPosted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void loadHistory() {
|
||||||
|
try {
|
||||||
|
// Create historyFile if missing
|
||||||
|
_state.historyFile = new File(_state.historyPath);
|
||||||
|
if (!_state.historyFile.exists())
|
||||||
|
_state.historyFile.createNewFile();
|
||||||
|
|
||||||
|
FileInputStream is = new FileInputStream(_state.historyFile);
|
||||||
|
String s;
|
||||||
|
while((s=readLine(is))!=null)
|
||||||
|
{
|
||||||
|
addHistory(s);
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean existsInHistory(SuckerState state, String messageId) {
|
||||||
|
int idx;
|
||||||
|
idx = messageId.lastIndexOf(":");
|
||||||
|
String lineToCompare = messageId.substring(0, idx-1);
|
||||||
|
idx = lineToCompare.lastIndexOf(":");
|
||||||
|
lineToCompare = lineToCompare.substring(0, idx-1);
|
||||||
|
Iterator iter = _state.history.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
String line = (String)iter.next();
|
||||||
|
idx = line.lastIndexOf(":");
|
||||||
|
if (idx < 0)
|
||||||
|
return false;
|
||||||
|
line = line.substring(0, idx-1);
|
||||||
|
idx = line.lastIndexOf(":");
|
||||||
|
if (idx < 0)
|
||||||
|
return false;
|
||||||
|
line = line.substring(0, idx-1);
|
||||||
|
if (line.equals(lineToCompare))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addHistory(String messageId) {
|
||||||
|
_state.history.add(messageId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void pruneHistory(String url, int nrToKeep) {
|
||||||
|
int i=0;
|
||||||
|
String urlHash=sha1(url);
|
||||||
|
|
||||||
|
// Count nr of entries containing url hash
|
||||||
|
Iterator iter = _state.history.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
String historyLine = (String) iter.next();
|
||||||
|
if(historyLine.startsWith(urlHash))
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep first nrToKeep entries
|
||||||
|
i = i - nrToKeep;
|
||||||
|
if(i>0)
|
||||||
|
{
|
||||||
|
iter = _state.history.iterator();
|
||||||
|
while(i>0 && iter.hasNext())
|
||||||
|
{
|
||||||
|
String historyLine = (String) iter.next();
|
||||||
|
if(historyLine.startsWith(urlHash))
|
||||||
|
{
|
||||||
|
iter.remove();
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void storeHistory() {
|
||||||
|
FileOutputStream hos = null;
|
||||||
|
try {
|
||||||
|
hos = new FileOutputStream(_state.historyFile, false);
|
||||||
|
Iterator iter = _state.history.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
String historyLine = (String) iter.next();
|
||||||
|
hos.write(historyLine.getBytes());
|
||||||
|
hos.write("\n".getBytes());
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
if (hos != null) try { hos.close(); } catch (IOException ioe) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
Sucker sucker = new Sucker();
|
Sucker sucker = new Sucker();
|
||||||
boolean ok = sucker.parseArgs(args);
|
boolean ok = sucker.parseArgs(args);
|
||||||
@ -288,8 +411,6 @@ public class Sucker {
|
|||||||
*/
|
*/
|
||||||
private static boolean execPushScript(SuckerState state, String id, String time) {
|
private static boolean execPushScript(SuckerState state, String id, String time) {
|
||||||
try {
|
try {
|
||||||
String ls_str;
|
|
||||||
|
|
||||||
String cli = state.pushScript + " " + state.outputDir + " " + id + " " + time;
|
String cli = state.pushScript + " " + state.outputDir + " " + id + " " + time;
|
||||||
Process pushScript_proc = Runtime.getRuntime().exec(cli);
|
Process pushScript_proc = Runtime.getRuntime().exec(cli);
|
||||||
|
|
||||||
@ -327,28 +448,11 @@ public class Sucker {
|
|||||||
/**
|
/**
|
||||||
* Converts the SyndEntry e to sml and fetches any images as attachments
|
* Converts the SyndEntry e to sml and fetches any images as attachments
|
||||||
*/
|
*/
|
||||||
private static String convertToSml(SuckerState state, SyndEntry e) {
|
private static boolean processEntry(SuckerState state, SyndEntry e, String time) {
|
||||||
String subject;
|
String subject;
|
||||||
|
|
||||||
state.stripNewlines=false;
|
state.stripNewlines=false;
|
||||||
|
|
||||||
// Calculate messageId, and check if we have got the message already
|
|
||||||
String feedHash = sha1(state.urlToLoad);
|
|
||||||
String itemHash = sha1(e.getTitle() + e.getDescription());
|
|
||||||
Date d = e.getPublishedDate();
|
|
||||||
String time;
|
|
||||||
if(d!=null)
|
|
||||||
time = "" + d.getTime();
|
|
||||||
else
|
|
||||||
time = "" + new Date().getTime();
|
|
||||||
String outputFileName = state.outputDir + "/" + state.messageNumber;
|
|
||||||
String messageId = feedHash + ":" + itemHash + ":" + time + ":" + outputFileName;
|
|
||||||
// Check if we already have this
|
|
||||||
if (existsInHistory(state, messageId))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
infoLog("new: " + messageId);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
String sml="";
|
String sml="";
|
||||||
@ -370,7 +474,6 @@ public class Sucker {
|
|||||||
List l = e.getContents();
|
List l = e.getContents();
|
||||||
if(l!=null)
|
if(l!=null)
|
||||||
{
|
{
|
||||||
debugLog("There is content");
|
|
||||||
iter = l.iterator();
|
iter = l.iterator();
|
||||||
while(iter.hasNext())
|
while(iter.hasNext())
|
||||||
{
|
{
|
||||||
@ -402,8 +505,8 @@ public class Sucker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String source=e.getLink(); //Uri();
|
String source=e.getLink(); //Uri();
|
||||||
if(source.indexOf("http")<0)
|
if(!source.startsWith("http://"))
|
||||||
source=state.baseUrl+source;
|
source=state.baseUrl+source;
|
||||||
sml += "[link schema=\"web\" location=\""+source+"\"]source[/link]\n";
|
sml += "[link schema=\"web\" location=\""+source+"\"]source[/link]\n";
|
||||||
|
|
||||||
if(state.pushToSyndie) {
|
if(state.pushToSyndie) {
|
||||||
@ -426,7 +529,7 @@ public class Sucker {
|
|||||||
|
|
||||||
if(uri==null) {
|
if(uri==null) {
|
||||||
errorLog("pushToSyndie failure.");
|
errorLog("pushToSyndie failure.");
|
||||||
return null;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
state.entriesPosted.add(uri);
|
state.entriesPosted.add(uri);
|
||||||
infoLog("pushToSyndie success, uri: "+uri.toString());
|
infoLog("pushToSyndie success, uri: "+uri.toString());
|
||||||
@ -448,14 +551,14 @@ public class Sucker {
|
|||||||
}
|
}
|
||||||
state.messageNumber++;
|
state.messageNumber++;
|
||||||
deleteTempFiles(state);
|
deleteTempFiles(state);
|
||||||
return messageId;
|
return true;
|
||||||
} catch (FileNotFoundException e1) {
|
} catch (FileNotFoundException e1) {
|
||||||
e1.printStackTrace();
|
e1.printStackTrace();
|
||||||
} catch (IOException e2) {
|
} catch (IOException e2) {
|
||||||
e2.printStackTrace();
|
e2.printStackTrace();
|
||||||
}
|
}
|
||||||
deleteTempFiles(state);
|
deleteTempFiles(state);
|
||||||
return null;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void deleteTempFiles(SuckerState state) {
|
private static void deleteTempFiles(SuckerState state) {
|
||||||
@ -570,7 +673,7 @@ public class Sucker {
|
|||||||
|
|
||||||
ret+="[/img]";
|
ret+="[/img]";
|
||||||
|
|
||||||
if(imageLink.indexOf("http")<0)
|
if(!imageLink.startsWith("http://"))
|
||||||
imageLink=state.baseUrl+"/"+imageLink;
|
imageLink=state.baseUrl+"/"+imageLink;
|
||||||
|
|
||||||
fetchAttachment(state, imageLink);
|
fetchAttachment(state, imageLink);
|
||||||
@ -592,7 +695,7 @@ public class Sucker {
|
|||||||
if (b >= htmlTagLowerCase.length())
|
if (b >= htmlTagLowerCase.length())
|
||||||
return null; // abort the b0rked tag
|
return null; // abort the b0rked tag
|
||||||
String link=htmlTag.substring(a,b);
|
String link=htmlTag.substring(a,b);
|
||||||
if(link.indexOf("http")<0)
|
if(!link.startsWith("http://"))
|
||||||
link=state.baseUrl+"/"+link;
|
link=state.baseUrl+"/"+link;
|
||||||
|
|
||||||
String schema="web";
|
String schema="web";
|
||||||
@ -613,6 +716,7 @@ public class Sucker {
|
|||||||
state.pendingEndLink=false;
|
state.pendingEndLink=false;
|
||||||
return "[/link]";
|
return "[/link]";
|
||||||
}
|
}
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if("<b>".equals(htmlTagLowerCase))
|
if("<b>".equals(htmlTagLowerCase))
|
||||||
@ -645,8 +749,21 @@ public class Sucker {
|
|||||||
return "";
|
return "";
|
||||||
if("</br>".equals(htmlTagLowerCase))
|
if("</br>".equals(htmlTagLowerCase))
|
||||||
return "";
|
return "";
|
||||||
if(htmlTagLowerCase.startsWith("<table") || "</table>".equals(htmlTagLowerCase)) // emulate table with hr
|
if(htmlTagLowerCase.startsWith("<hr"))
|
||||||
|
return "";
|
||||||
|
if("</img>".equals(htmlTagLowerCase))
|
||||||
|
return "";
|
||||||
|
if("</font>".equals(htmlTagLowerCase))
|
||||||
|
return "";
|
||||||
|
if("<blockquote>".equals(htmlTagLowerCase))
|
||||||
|
return "[quote]";
|
||||||
|
if("</blockquote>".equals(htmlTagLowerCase))
|
||||||
|
return "[/quote]";
|
||||||
|
if(htmlTagLowerCase.startsWith("<table") || "</table>".equals(htmlTagLowerCase)) // emulate table with hr :)
|
||||||
return "[hr][/hr]";
|
return "[hr][/hr]";
|
||||||
|
if(htmlTagLowerCase.startsWith("<font"))
|
||||||
|
return "";
|
||||||
|
|
||||||
|
|
||||||
for(int i=0;i<ignoreTags.length;i++) {
|
for(int i=0;i<ignoreTags.length;i++) {
|
||||||
String openTag = "<"+ignoreTags[i];
|
String openTag = "<"+ignoreTags[i];
|
||||||
@ -754,36 +871,6 @@ public class Sucker {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean existsInHistory(SuckerState state, String messageId) {
|
|
||||||
int idx;
|
|
||||||
idx = messageId.lastIndexOf(":");
|
|
||||||
String lineToCompare = messageId.substring(0, idx-1);
|
|
||||||
idx = lineToCompare.lastIndexOf(":");
|
|
||||||
lineToCompare = lineToCompare.substring(0, idx-1);
|
|
||||||
FileInputStream his = null;
|
|
||||||
try {
|
|
||||||
his = new FileInputStream(state.historyFile);
|
|
||||||
String line;
|
|
||||||
while ((line = readLine(his)) != null) {
|
|
||||||
idx = line.lastIndexOf(":");
|
|
||||||
if (idx < 0)
|
|
||||||
return false;
|
|
||||||
line = line.substring(0, idx-1);
|
|
||||||
idx = line.lastIndexOf(":");
|
|
||||||
if (idx < 0)
|
|
||||||
return false;
|
|
||||||
line = line.substring(0, idx-1);
|
|
||||||
if (line.equals(lineToCompare))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
if (his != null) try { his.close(); } catch (IOException ioe) {}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String sha1(String s) {
|
private static String sha1(String s) {
|
||||||
try {
|
try {
|
||||||
MessageDigest md = MessageDigest.getInstance("SHA");
|
MessageDigest md = MessageDigest.getInstance("SHA");
|
||||||
@ -805,10 +892,10 @@ public class Sucker {
|
|||||||
c = in.read();
|
c = in.read();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
break;
|
return null;
|
||||||
}
|
}
|
||||||
if (c < 0)
|
if (c < 0)
|
||||||
break;
|
return null;
|
||||||
if (c == '\n')
|
if (c == '\n')
|
||||||
break;
|
break;
|
||||||
sb.append((char) c);
|
sb.append((char) c);
|
||||||
@ -897,6 +984,7 @@ class SuckerState {
|
|||||||
BlogManager bm;
|
BlogManager bm;
|
||||||
User user;
|
User user;
|
||||||
List entriesPosted;
|
List entriesPosted;
|
||||||
|
List history;
|
||||||
|
|
||||||
//
|
//
|
||||||
List fileNames;
|
List fileNames;
|
||||||
|
@ -17,13 +17,6 @@ import net.i2p.syndie.sml.*;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedule the import of atom/rss feeds
|
* Schedule the import of atom/rss feeds
|
||||||
*
|
|
||||||
* <p><h3>todo:</h3>
|
|
||||||
* <p>caching (eepget should do it)
|
|
||||||
* <p>enclosures support (requires cvs rome)
|
|
||||||
* <p>syndie.sucker.minHistory/maxHistory used to roll over the history file?
|
|
||||||
* <p>configurable update period
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class ImportFeedServlet extends BaseServlet {
|
public class ImportFeedServlet extends BaseServlet {
|
||||||
protected String getTitle() { return "Syndie :: Import feed"; }
|
protected String getTitle() { return "Syndie :: Import feed"; }
|
||||||
@ -80,7 +73,6 @@ public class ImportFeedServlet extends BaseServlet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if ( (action != null) && ("Delete".equals(action)) ) {
|
} else if ( (action != null) && ("Delete".equals(action)) ) {
|
||||||
out.write("<span class=\"b_rssImportMsgErr\">Delete some thing</span><br />");
|
|
||||||
if (url == null || blog == null || tagPrefix == null) {
|
if (url == null || blog == null || tagPrefix == null) {
|
||||||
out.write("<span class=\"b_rssImportMsgErr\">error, some fields were empty.</span><br />");
|
out.write("<span class=\"b_rssImportMsgErr\">error, some fields were empty.</span><br />");
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
$Id: history.txt,v 1.406 2006/02/18 22:22:33 jrandom Exp $
|
$Id: history.txt,v 1.407 2006/02/19 07:29:57 jrandom Exp $
|
||||||
|
|
||||||
|
2006-02-19 dust
|
||||||
|
* Added pruning of suckers history (it used to grow indefinitely).
|
||||||
|
|
||||||
2006-02-19 jrandom
|
2006-02-19 jrandom
|
||||||
* Moved the current net's reseed URL to a different location than where
|
* Moved the current net's reseed URL to a different location than where
|
||||||
|
Reference in New Issue
Block a user