From 7016e6c7b924ea9b1feef6127f4ec8ee9780884e Mon Sep 17 00:00:00 2001 From: Nelson Tavares de Sousa <ntd@informatik.uni-kiel.de> Date: Tue, 17 Feb 2015 16:25:57 +0100 Subject: [PATCH] modded ToLowerCase to remove punctuation; prototype works correctly, still no scaling --- src/main/java/teetime/stage/CountingMapMerger.java | 1 - src/main/java/teetime/stage/string/ToLowerCase.java | 3 ++- src/main/java/teetime/stage/string/WordCounter.java | 2 +- .../java/teetime/stage/WordCountingConfiguration.java | 7 +++++-- src/test/java/teetime/stage/WordCountingTest.java | 8 +++++++- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/main/java/teetime/stage/CountingMapMerger.java b/src/main/java/teetime/stage/CountingMapMerger.java index 0b4ec5f1..75a9daea 100644 --- a/src/main/java/teetime/stage/CountingMapMerger.java +++ b/src/main/java/teetime/stage/CountingMapMerger.java @@ -47,7 +47,6 @@ public class CountingMapMerger<T> extends AbstractConsumerStage<CountingMap<T>> @Override public void onTerminating() throws Exception { - System.out.println("TERMINATE"); port.send(result); super.onTerminating(); } diff --git a/src/main/java/teetime/stage/string/ToLowerCase.java b/src/main/java/teetime/stage/string/ToLowerCase.java index 5f97ad5e..b3be8949 100644 --- a/src/main/java/teetime/stage/string/ToLowerCase.java +++ b/src/main/java/teetime/stage/string/ToLowerCase.java @@ -20,6 +20,7 @@ import teetime.framework.OutputPort; /** * Receives a string and passes it on to the next stage only with lower case letters. + * Punctuation and similar characters will be removed. Only [a-zA-Z ] will be passed on. * * @author Nelson Tavares de Sousa * @@ -30,7 +31,7 @@ public class ToLowerCase extends AbstractConsumerStage<String> { @Override protected void execute(final String element) { - outputPort.send(element.toLowerCase()); + outputPort.send(element.replaceAll("[^a-zA-Z ]", "").toLowerCase()); } diff --git a/src/main/java/teetime/stage/string/WordCounter.java b/src/main/java/teetime/stage/string/WordCounter.java index 271e0d55..e453a463 100644 --- a/src/main/java/teetime/stage/string/WordCounter.java +++ b/src/main/java/teetime/stage/string/WordCounter.java @@ -32,7 +32,7 @@ public class WordCounter extends CompositeStage { public WordCounter() { lastStages.add(mapCounter); - IPipeFactory pipeFact = PipeFactoryRegistry.INSTANCE.getPipeFactory(ThreadCommunication.INTER, PipeOrdering.QUEUE_BASED, false); + IPipeFactory pipeFact = PipeFactoryRegistry.INSTANCE.getPipeFactory(ThreadCommunication.INTRA, PipeOrdering.ARBITRARY, false); ToLowerCase toLowerCase = new ToLowerCase(); pipeFact.create(tokenizer.getOutputPort(), toLowerCase.getInputPort()); pipeFact.create(toLowerCase.getOutputPort(), mapCounter.getInputPort()); diff --git a/src/test/java/teetime/stage/WordCountingConfiguration.java b/src/test/java/teetime/stage/WordCountingConfiguration.java index f238ea29..de0fbc2f 100644 --- a/src/test/java/teetime/stage/WordCountingConfiguration.java +++ b/src/test/java/teetime/stage/WordCountingConfiguration.java @@ -26,8 +26,8 @@ public class WordCountingConfiguration extends AnalysisConfiguration { final Merger<CountingMap<String>> merger = new Merger<CountingMap<String>>(); // result - IPipeFactory interFact = PIPE_FACTORY_REGISTRY.getPipeFactory(ThreadCommunication.INTER, PipeOrdering.QUEUE_BASED, false); - IPipeFactory intraFact = PIPE_FACTORY_REGISTRY.getPipeFactory(ThreadCommunication.INTRA, PipeOrdering.ARBITRARY, false); + IPipeFactory intraFact = PIPE_FACTORY_REGISTRY.getPipeFactory(ThreadCommunication.INTER, PipeOrdering.QUEUE_BASED, false); + IPipeFactory interFact = PIPE_FACTORY_REGISTRY.getPipeFactory(ThreadCommunication.INTRA, PipeOrdering.ARBITRARY, false); interFact.create(init.getOutputPort(), f2b.getInputPort()); interFact.create(f2b.getOutputPort(), b2s.getInputPort()); @@ -39,6 +39,9 @@ public class WordCountingConfiguration extends AnalysisConfiguration { interFact.create(merger.getOutputPort(), result.getInputPort()); + addThreadableStage(init); + addThreadableStage(wc); + addThreadableStage(merger); } public CountingMap<String> getResult() { diff --git a/src/test/java/teetime/stage/WordCountingTest.java b/src/test/java/teetime/stage/WordCountingTest.java index 4e584d3e..20e90ab2 100644 --- a/src/test/java/teetime/stage/WordCountingTest.java +++ b/src/test/java/teetime/stage/WordCountingTest.java @@ -1,10 +1,12 @@ package teetime.stage; import java.io.File; +import java.util.Map; import org.junit.Test; import teetime.framework.Analysis; +import teetime.stage.util.CountingMap; public class WordCountingTest { @@ -13,6 +15,10 @@ public class WordCountingTest { WordCountingConfiguration wcc = new WordCountingConfiguration(new File("src/test/resources/data/output.txt")); Analysis analysis = new Analysis(wcc); analysis.start(); - System.out.println(wcc.getResult().size()); + CountingMap<String> map = wcc.getResult(); + for (Map.Entry<String, Integer> entry : map.entrySet()) + { + System.out.println(entry.getKey() + " " + entry.getValue()); + } } } -- GitLab