Analysing n-grams in a sample text (NgramAnalyser)
<<NgramAnalyser.java>>
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
/**
* Perform n-gram analysis of a string.
*
* Analyses the frequency with which distinct n-grams, of length n,
* appear in an input string. For the purposes of all analyses of the input
* string, the final n-1 n-grams appearing in the string should be
* “filled out” to a length of n characters, by adding
* a sequence of contiguous characters from the start of the string.
* e.g. “abbc” includes “bca” and “cab” in its 3-grams
*
* @author
* @version
*/
public class NgramAnalyser
{
/** dictionary of all distinct n-grams and their frequencies */
private HashMap<String,Integer> ngram;
/** number of distinct characters in the input */
private int alphabetSize;
/** n-gram size for this object (new field) */
private int ngramSize;
/**
* Analyse the frequency with which distinct n-grams, of length n,
* appear in an input string.
* n-grams at the end of the string wrap to the front
* e.g. “abbbbc” includes “bca” and “cab” in its 3-grams
* @param int n size of n-grams to create
* @param String inp input string to be modelled
*/
public NgramAnalyser(int n, String inp)
{
//TODO replace this line with your code
}
/**
* Analyses the input text for n-grams of size 1.
*/
public NgramAnalyser(String inp)
{
this(1,inp);
}
/**
* @return int the size of the alphabet of a given input
*/
public int getAlphabetSize() {
//TODO replace this line with your code
return -1;
}
/**
* @return the total number of distinct n-grams appearing
* in the input text.
*/
public int getDistinctNgramCount() {
//TODO replace this line with your code
return -1;
}
/**
* @return Return a set containing all the distinct n-grams
* in the input string.
*/
public Set<String> getDistinctNgrams() {
//TODO replace this line with your code
return null;
}
/**
* @return the total number of n-grams appearing
* in the input text (not requiring them to be distinct)
*/
public int getNgramCount() {
//TODO replace this line with your code
return -1;
}
/** Return the frequency with which a particular n-gram appears
* in the text. If it does not appear at all, return 0.
*
* @param ngram The n-gram to get the frequency of
* @return The frequency with which the n-gram appears.
*/
public int getNgramFrequency(String ngram) {
//TODO replace this line with your code
return -1;
}
/**
* Generate a summary of the ngrams for this object.
* @return a string representation of the n-grams in the input text
* comprising the ngram size and then each ngram and its frequency
* where ngrams are presented in alphabetical order.
*/
public String toString()
{
//TODO replace this line with your code
return null;
}
}
————————————————————————————————————————–
————————————————————————————————————————–
<<ProjectTest.java>>
import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
/**
* The test class ProjectTest for student test cases.
* Add all new test cases to this task.
*
* @author
* @version
*/
public class ProjectTest
{
/**
* Default constructor for test class ProjectTest
*/
public ProjectTest()
{
}
/**
* Sets up the test fixture.
*
* Called before every test case method.
*/
@Before
public void setUp()
{
}
/**
* Tears down the test fixture.
*
* Called after every test case method.
*/
@After
public void tearDown()
{
}
//TODO add new test cases from here include brief documentation
@Test(timeout=1000)
public void testSensibleToStringSize() {
assertEquals(0,1); //TODO replace with test code
}
@Test(timeout=1000)
public void testGetDistinctNgrams() {
assertEquals(0,1); //TODO replace with test code
}
}