diff --git a/src/main/java/teetime/stage/string/ToLowerCase.java b/src/main/java/teetime/stage/string/ToLowerCase.java index ee32c2bda2396cd81ece31fb68ce350387e02c13..78b4dc72c27beb30e46d531fe5168c13d0045694 100644 --- a/src/main/java/teetime/stage/string/ToLowerCase.java +++ b/src/main/java/teetime/stage/string/ToLowerCase.java @@ -20,12 +20,10 @@ import teetime.framework.OutputPort; /** * Receives a string and passes it on to the next stage only with lower case letters. - * Punctuation and similar characters will be removed. Only [a-zA-Z ] will be passed on. * * @since 1.1 * * @author Nelson Tavares de Sousa - * */ public final class ToLowerCase extends AbstractConsumerStage<String> { @@ -33,12 +31,12 @@ public final class ToLowerCase extends AbstractConsumerStage<String> { @Override protected void execute(final String element) { - outputPort.send(element.replaceAll("[^a-zA-Z ]", "").toLowerCase()); + this.outputPort.send(element.toLowerCase()); } public OutputPort<String> getOutputPort() { - return outputPort; + return this.outputPort; } } diff --git a/src/main/java/teetime/stage/string/WordCounter.java b/src/main/java/teetime/stage/string/WordCounter.java index 9f26beee3e9093c3ec35d954abd3e29e896f5c49..4075e4d9cac0f2bd147ecea0569174ae61b921ee 100644 --- a/src/main/java/teetime/stage/string/WordCounter.java +++ b/src/main/java/teetime/stage/string/WordCounter.java @@ -43,29 +43,32 @@ public final class WordCounter extends CompositeStage { // The connection of the different stages is realized within the construction of a instance of this class. public WordCounter() { - lastStages.add(mapCounter); + this.lastStages.add(this.mapCounter); - ToLowerCase toLowerCase = new ToLowerCase(); - connectStages(tokenizer.getOutputPort(), toLowerCase.getInputPort()); - connectStages(toLowerCase.getOutputPort(), mapCounter.getInputPort()); + final ToLowerCase toLowerCase = new ToLowerCase(); + final WordcharacterFilter wordcharacterFilter = new WordcharacterFilter(); + + connectStages(this.tokenizer.getOutputPort(), toLowerCase.getInputPort()); + connectStages(toLowerCase.getOutputPort(), wordcharacterFilter.getInputPort()); + connectStages(wordcharacterFilter.getOutputPort(), this.mapCounter.getInputPort()); } @Override protected Stage getFirstStage() { - return tokenizer; + return this.tokenizer; } @Override protected Collection<? extends Stage> getLastStages() { - return lastStages; + return this.lastStages; } public InputPort<String> getInputPort() { - return tokenizer.getInputPort(); + return this.tokenizer.getInputPort(); } public OutputPort<CountingMap<String>> getOutputPort() { - return mapCounter.getOutputPort(); + return this.mapCounter.getOutputPort(); } } diff --git a/src/main/java/teetime/stage/string/WordcharacterFilter.java b/src/main/java/teetime/stage/string/WordcharacterFilter.java new file mode 100644 index 0000000000000000000000000000000000000000..876271e28e236f31239ee13479ea80dc9d9d037c --- /dev/null +++ b/src/main/java/teetime/stage/string/WordcharacterFilter.java @@ -0,0 +1,43 @@ +/** + * Copyright (C) 2015 TeeTime (http://teetime.sourceforge.net) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package teetime.stage.string; + +import teetime.framework.AbstractConsumerStage; +import teetime.framework.OutputPort; + +/** + * Receives a string and passes it with removed punctuation and similar characters on to the next stage. Only [a-zA-Z ] will be passed on. + * + * @since 1.1 + * + * @author Nelson Tavares de Sousa + * + */ +public final class WordcharacterFilter extends AbstractConsumerStage<String> { + + private final OutputPort<String> outputPort = this.createOutputPort(); + + @Override + protected void execute(final String element) { + this.outputPort.send(element.replaceAll("[^a-zA-Z ]", "")); + + } + + public OutputPort<String> getOutputPort() { + return this.outputPort; + } + +}