commit fffb91447a78fc0bd7ebb7b842dc53afcc769fd7 Author: orbiter Date: Wed Dec 1 14:55:40 2010 +0000 fixed crawl queue delete function git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7357 6c8d7289-2bf4-0310-a012-ef5d649a1542 diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 2cc1b5c..55f2c42 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -31,9 +31,9 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -114,6 +114,7 @@ public class IndexCreateWWWLocalQueue_p { // iterating through the list of URLs final Iterator iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.STACK_TYPE_CORE); Request entry; + List removehashes = new ArrayList(); while (iter.hasNext()) { if ((entry = iter.next()) == null) continue; String value = null; @@ -129,12 +130,11 @@ public class IndexCreateWWWLocalQueue_p { default: value = null; break location; } - if (value != null) { - final Matcher matcher = compiledPattern.matcher(value); - if (matcher.find()) { - sb.crawlQueues.noticeURL.removeByURLHash(entry.url().hash()); - } - } + if (value != null && compiledPattern.matcher(value).find()) removehashes.add(entry.url().hash()); + } + Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'"); + for (byte[] b: removehashes) { + sb.crawlQueues.noticeURL.removeByURLHash(b); } } } catch (final PatternSyntaxException e) { diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 6ff684d..ccd27a0 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -592,6 +592,7 @@ public class Balancer { try { return (entry == null) ? null : new Request(entry); } catch (final IOException e) { + Log.logException(e); rowIterator = null; return null; } diff --git a/source/de/anomic/crawler/NoticedURL.java b/source/de/anomic/crawler/NoticedURL.java index 13cf3db..652552d 100755 --- a/source/de/anomic/crawler/NoticedURL.java +++ b/source/de/anomic/crawler/NoticedURL.java @@ -193,6 +193,7 @@ public class NoticedURL { try {return remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {} return false; } catch (RowSpaceExceededException e) { + Log.logException(e); return false; } }