Skip to content

Commit

Permalink
robert-bor#49 Added documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
danbeck committed Aug 19, 2019
1 parent ceb306b commit 95416ed
Show file tree
Hide file tree
Showing 11 changed files with 180 additions and 56 deletions.
7 changes: 7 additions & 0 deletions src/main/java/org/ahocorasick/trie/Payload.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package org.ahocorasick.trie;

/**
* Payload holds the matched keyword and some payload-data.
*
* @author Daniel Beck
*
* @param <T> The type of the wrapped payload data.
*/
public class Payload<T> implements Comparable<Payload<T>> {

private final String keyword;
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/org/ahocorasick/trie/PayloadEmit.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import org.ahocorasick.interval.Interval;
import org.ahocorasick.interval.Intervalable;

/**
* PayloadEmit contains a matched term and its associated payload data.
*
* @param <T> Type of the wrapped payload-data.
* @author Daniel Beck
*
*/
public class PayloadEmit<T> extends Interval implements Intervalable {

private final String keyword;
Expand All @@ -19,6 +26,11 @@ public String getKeyword() {
return this.keyword;
}

/**
* Returns the payload associated to this emit.
*
* @return the associated payload
*/
public T getPayload() {
return this.payload;
}
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/org/ahocorasick/trie/PayloadFragmentToken.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
package org.ahocorasick.trie;

/***
* PayloadFragmentToken holds a text ("the fragment").
* <p>
* It does not matches a search term - so its <code>isMatch</code>-method
* returns always false. <code>getEmits</code> returns not Emits.
*
* @author Daniel Beck
*
* @param <T> The Type of the emitted payloads.
*/
public class PayloadFragmentToken<T> extends PayloadToken<T> {

public PayloadFragmentToken(String fragment) {
Expand All @@ -11,6 +21,9 @@ public boolean isMatch() {
return false;
}

/**
* Returns null.
*/
@Override
public PayloadEmit<T> getEmit() {
return null;
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/org/ahocorasick/trie/PayloadMatchToken.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
package org.ahocorasick.trie;

/**
* PayloadMatchToken holds a text ("the fragment") an emits some output.
* <p>
* It matches a search term - so its <code>isMatch</code>-method returns always
* true..
*
* @author Daniel Beck
*
* @param <T> The Type of the emitted payloads.
*/
public class PayloadMatchToken<T> extends PayloadToken<T> {

private final PayloadEmit<T> emit;
Expand Down
22 changes: 19 additions & 3 deletions src/main/java/org/ahocorasick/trie/PayloadState.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
* <li>failure; when a character has no matching state, the algorithm must be
* able to fall back on a state with less depth</li>
* <li>emits; when this state is passed and keywords have been matched, the
* matches must be 'emitted' so that they can be used later on.</li>
* matches and their payloads must be 'emitted' so that they can be used later
* on.</li>
* </ul>
* <p>
* <p>
Expand Down Expand Up @@ -105,19 +106,34 @@ public int getDepth() {
return this.depth;
}

public void addEmit(Payload<T> keyword) {
/**
* Adds a payload to be emitted for this state.
*
* @param emit Payload to be emitted.
*/
public void addEmit(Payload<T> payload) {
if (this.emits == null) {
this.emits = new TreeSet<>();
}
this.emits.add(keyword);
this.emits.add(payload);
}

/**
* Adds a collection of payloads to be emitted for this state.
*
* @param emits Collection of payloads to be emitted.
*/
public void addEmit(Collection<Payload<T>> emits) {
for (Payload<T> emit : emits) {
addEmit(emit);
}
}

/**
* Returns a collection of emitted payloads for this state.
*
* @return Collection of emitted payloads.
*/
public Collection<Payload<T>> emit() {
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits;
}
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/org/ahocorasick/trie/PayloadToken.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
package org.ahocorasick.trie;

/***
* PayloadToken holds a text ("the fragment") an emits some output. If
* <code>isMatch</code> returns true, the token matched a search term.
*
* @author Daniel Beck
*
* @param <T> The Type of the emitted payloads.
*/
public abstract class PayloadToken<T> {
private String fragment;

Expand All @@ -11,6 +19,9 @@ public String getFragment() {
return this.fragment;
}

/**
* Return true if a search term matched.
*/
public abstract boolean isMatch();

public abstract PayloadEmit<T> getEmit();
Expand Down
66 changes: 57 additions & 9 deletions src/main/java/org/ahocorasick/trie/PayloadTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@
import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate;

/**
* Based on the Aho-Corasick white paper, Bell technologies:
* http://cr.yp.to/bib/1975/aho.pdf
* A trie implementation, based on the Aho-Corasick white paper, Bell
* technologies: http://cr.yp.to/bib/1975/aho.pdf
* <p>
*
* @author Robert Bor
* @param <U>
* The payload trie adds the possibility to specify emitted payloads for each
* added keyword.
*
* @author Daniel Beck
* @param <T> The type of the supplied of the payload
*/
public class PayloadTrie<T> {

Expand All @@ -38,6 +42,7 @@ protected PayloadTrie(final TrieConfig trieConfig) {
* Used by the builder to add a text search keyword with a emit payload.
*
* @param keyword The search term to add to the list of search terms.
* @param emit the payload to emit for this search term.
* @throws NullPointerException if the keyword is null.
*/
private void addKeyword(String keyword, T emit) {
Expand Down Expand Up @@ -74,6 +79,11 @@ private PayloadState<T> addState(final String keyword) {
return getRootState().addState(keyword);
}

/**
* Tokenizes the specified text and returns the emitted outputs.
*
* @param text The text to tokenize.
*/
public Collection<PayloadToken<T>> tokenize(final String text) {
final Collection<PayloadToken<T>> tokens = new ArrayList<>();
final Collection<PayloadEmit<T>> collectedEmits = parseText(text);
Expand Down Expand Up @@ -104,10 +114,24 @@ private PayloadToken<T> createMatch(PayloadEmit<T> emit, String text) {
return new PayloadMatchToken<T>(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
}

/**
* Tokenizes a specified text and returns the emitted outputs.
*
* @param text The character sequence to tokenize.
* @return A collection of emits.
*/
public Collection<PayloadEmit<T>> parseText(final CharSequence text) {
return parseText(text, new DefaultPayloadEmitHandler<T>());
}

/**
* Tokenizes the specified text by using a custom EmitHandler and returns the
* emitted outputs.
*
* @param text The character sequence to tokenize.
* @param emitHandler The emit handler that will be used to parse the text.
* @return A collection of emits.
*/
@SuppressWarnings("unchecked")
public Collection<PayloadEmit<T>> parseText(final CharSequence text, final StatefulPayloadEmitHandler<T> emitHandler) {
parseText(text, (PayloadEmitHandler<T>) emitHandler);
Expand All @@ -130,10 +154,27 @@ public Collection<PayloadEmit<T>> parseText(final CharSequence text, final State
return collectedEmits;
}

/**
* Returns true if the text contains contains one of the search terms. Else,
* returns false.
*
* @param Text Specified text.
* @return true if the text contains one of the search terms. Else, returns
* false.
*/
public boolean containsMatch(final CharSequence text) {
return firstMatch(text) != null;
}

/**
* Tokenizes the specified text by using a custom EmitHandler and returns the
* emitted outputs.
*
* @param text The character sequence to tokenize.
* @param emitHandler The emit handler that will be used to parse the text.
* @return A collection of emits.
*/

public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) {
PayloadState<T> currentState = getRootState();

Expand Down Expand Up @@ -314,6 +355,11 @@ public static <T> PayloadTrieBuilder<T> builder() {
return new PayloadTrieBuilder<T>();
}

/**
* Builder class to create a PayloadTrie instance.
*
* @param <T> The type of the emitted payload.
*/
public static class PayloadTrieBuilder<T> {

private final TrieConfig trieConfig = new TrieConfig();
Expand Down Expand Up @@ -350,7 +396,8 @@ public PayloadTrieBuilder<T> ignoreOverlaps() {
}

/**
* Adds a keyword to the Trie's list of text search keywords.
* Adds a keyword to the Trie's list of text search keywords. No Payload is
* supplied.
*
* @param keyword The keyword to add to the list.
* @return This builder.
Expand All @@ -362,7 +409,7 @@ public PayloadTrieBuilder<T> addKeyword(final String keyword) {
}

/**
* Adds a keyword to the Trie's list of text search keywords.
* Adds a keyword and a payload to the Trie's list of text search keywords.
*
* @param keyword The keyword to add to the list.
* @return This builder.
Expand All @@ -374,7 +421,8 @@ public PayloadTrieBuilder<T> addKeyword(final String keyword, final T payload) {
}

/**
* Adds a list of keywords to the Trie's list of text search keywords.
* Adds a list of keywords and payloads to the Trie's list of text search
* keywords.
*
* @param keywords The keywords to add to the list.
* @return This builder.
Expand Down Expand Up @@ -419,9 +467,9 @@ public PayloadTrieBuilder<T> stopOnHit() {
}

/**
* Configure the Trie based on the builder settings.
* Configure the PayloadTrie based on the builder settings.
*
* @return The configured Trie.
* @return The configured PayloadTrie.
*/
public PayloadTrie<T> build() {
this.trie.constructFailureStates();
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/ahocorasick/trie/Trie.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import org.ahocorasick.trie.PayloadTrie.PayloadTrieBuilder;
import org.ahocorasick.trie.handler.EmitHandler;
import org.ahocorasick.trie.handler.StatefulPayloadEmitDelegateHandler;
import org.ahocorasick.trie.handler.PayloadEmitDelegateHandler;
import org.ahocorasick.trie.handler.PayloadEmitDelegateHandlerStateless;
import org.ahocorasick.trie.handler.StatefulEmitHandler;

/**
Expand Down Expand Up @@ -56,7 +56,7 @@ public Collection<Emit> parseText(final CharSequence text) {
@SuppressWarnings("unchecked")
public Collection<Emit> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text,
new PayloadEmitDelegateHandler(emitHandler));
new StatefulPayloadEmitDelegateHandler(emitHandler));
return asEmits(parsedText);
}

Expand All @@ -65,7 +65,7 @@ public boolean containsMatch(final CharSequence text) {
}

public void parseText(final CharSequence text, final EmitHandler emitHandler) {
this.payloadTrie.parseText(text, new PayloadEmitDelegateHandlerStateless(emitHandler));
this.payloadTrie.parseText(text, new PayloadEmitDelegateHandler(emitHandler));
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
package org.ahocorasick.trie.handler;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.ahocorasick.trie.Emit;
import org.ahocorasick.trie.PayloadEmit;

public class PayloadEmitDelegateHandler implements StatefulPayloadEmitHandler<String> {
/**
* Convenience wrapper class that delegates every method to a EmitHandler.
*/
public class PayloadEmitDelegateHandler implements PayloadEmitHandler<String> {

private StatefulEmitHandler handler;
private EmitHandler handler;

public PayloadEmitDelegateHandler(StatefulEmitHandler handler) {
public PayloadEmitDelegateHandler(EmitHandler handler) {
this.handler = handler;

}

private static List<PayloadEmit<String>> asEmits(Collection<Emit> emits) {
List<PayloadEmit<String>> result = new ArrayList<>();
for (Emit emit : emits) {
result.add(new PayloadEmit<String>(emit.getStart(), emit.getEnd(), emit.getKeyword(), null));
}
return result;
}

@Override
public boolean emit(PayloadEmit<String> emit) {
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
return handler.emit(newEmit);
}

@Override
public List<PayloadEmit<String>> getEmits() {
List<Emit> emits = this.handler.getEmits();
return asEmits(emits);
}
}

This file was deleted.

Loading

0 comments on commit 95416ed

Please sign in to comment.