Skip to content
This repository has been archived by the owner on Jun 2, 2019. It is now read-only.

Commit

Permalink
Reimplement UrlStore
Browse files Browse the repository at this point in the history
  • Loading branch information
dethi committed Dec 9, 2017
1 parent 129f659 commit e4f0271
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 51 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/epita/guereza/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ public static void main(String[] args) {
final Function<Scope, CrawlerApp> newCrawlerApp = (s) -> new CrawlerApp(s.instanceOf(EventBusClient.class), s.instanceOf(Crawler.class));
final Function<Scope, IndexerApp> newIndexerApp = (s) -> new IndexerApp(s.instanceOf(EventBusClient.class), s.instanceOf(Indexer.class),
s.instanceOf(Crawler.class));
final Function<Scope, Repo> newRepo = (s) -> new UrlStore(s.instanceOf(EventBusClient.class));
final Function<Scope, UrlStore> newUrlStore = (s) -> new UrlStore(s.instanceOf(EventBusClient.class));

new Scope()
.register(new Singleton<>(Crawler.class, crawler))
.register(new Singleton<>(Indexer.class, indexer))
.register(new LazySingleton<>(Repo.class, newRepo))
.register(new LazySingleton<>(UrlStore.class, newUrlStore))
.register(new Prototype<>(EventBusClient.class, newEventBus))
.register(new Prototype<>(CrawlerApp.class, newCrawlerApp))
.register(new Prototype<>(IndexerApp.class, newIndexerApp))
Expand Down
17 changes: 0 additions & 17 deletions src/main/java/com/epita/guereza/Repo.java

This file was deleted.

52 changes: 20 additions & 32 deletions src/main/java/com/epita/guereza/UrlStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,21 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.LinkedHashSet;
import java.util.Set;
import java.util.*;

public class UrlStore implements Repo, Reducer {
public class UrlStore implements Reducer {
private static final Logger LOGGER = LoggerFactory.getLogger(CrawlerService.class);

private final EventBusClient eventBus;

private Set<String> urlDone = new LinkedHashSet<>();
private Set<String> urlTodo = new LinkedHashSet<>();
private Set<String> allUrls = new HashSet<>();
private Queue<String> crawlerTodo = new LinkedList<>();
private Queue<String> indexerTodo = new LinkedList<>();

public UrlStore(final EventBusClient eventBus) {
this.eventBus = eventBus;
}

@Override
public void store(String[] urls) {
for (String url : urls) {
if (url == null || url.isEmpty())
continue;

if (!urlDone.contains(url))
urlTodo.add(url);
}
}

@Override
public String nextUrl() {
if (!urlTodo.isEmpty()) {
// There is still
String url = urlTodo.iterator().next();
urlTodo.remove(url);
urlDone.add(url);
LOGGER.info("Repo still contains {} links", urlTodo.size());
return url;
}
LOGGER.warn("No more url to analyse.");
return null;
}

@SuppressWarnings("unchecked")
@Override
public void reduce(final Event<?> event) {
Expand All @@ -65,22 +40,35 @@ public void reduce(final Event<?> event) {
}
}

private void store(String[] urls) {
for (String url : urls) {
if (url == null || url.isEmpty())
continue;

if (!allUrls.contains(url)) {
allUrls.add(url);
crawlerTodo.add(url);
indexerTodo.add(url);
}
}
}

private void addUrls(Event<String[]> event) {
store(event.obj);
LOGGER.info("added URLs to the repo");
}

private void crawlerRequestUrl(Event<String> event) {
try {
eventBus.publish(new EventMessage(event.obj, nextUrl()));
eventBus.publish(new EventMessage(event.obj, crawlerTodo.poll()));
} catch (JsonProcessingException e) {
LOGGER.error("cannot serialize: {}", e.getMessage());
}
}

private void indexerRequestUrl(Event<String> event) {
try {
eventBus.publish(new EventMessage(event.obj, nextUrl()));
eventBus.publish(new EventMessage(event.obj, indexerTodo.poll()));
} catch (JsonProcessingException e) {
LOGGER.error("cannot serialize: {}", e.getMessage());
}
Expand Down

0 comments on commit e4f0271

Please sign in to comment.