Concordance Algorithm
I was given this test before in one of my Job Interviews will not name the company. I'll show you how I tackled the problem.
"Given an arbitrary text document written in English, write a program that
will generate a concordance, i.e. an alphabetical list of all word
occurrences, labeled with word frequencies. Bonus: label each word with the
sentence numbers in which each occurrence appeared."
For example, this is a concordance of the above text:
a. a {2:1,1}
b. all {1:1}
c. alphabetical {1:1}
d. an {2:1,1}
e. appeared {1:2}
f. arbitrary {1:1}
g. bonus {1:2}
h. concordance {1:1}
i. document {1:1}
j. each {1:2}
k. english {1:1}
l. frequencies {1:1}
m. generate {1:1}
n. given {1:1}
o. i.e. {1:1}
p. in {2:1,2}
q. it {1:2}
r. label {1:2}
s. labeled {1:1}
t. list {1:1}
u. numbers {1:2}
v. occurrences {1:1}
w. of {1:1}
x. program {1:1}
y. sentence {1:2}
z. text {1:1}
aa. that {1:1}
bb. the {1:2}
cc. which {1:2}
dd. will {1:1}
ee. with {2:1,2}
ff. word {3:1,1,2}
gg. write {1:1}
hh. written {1:1}
Here is my solution to the above.
ConcordanceManager
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace App.Test { public class ConcordanceManager { private string _textInput; private List<Sentence> _sentences; private List<ConcordanceItem> _concordanceList; public List<ConcordanceItem> ConcordanceList { get { return _concordanceList.OrderBy(x => x.Text).ToList(); } } public ConcordanceManager(string inputText) { _textInput = inputText; _sentences = new List<Sentence>(); _concordanceList = new List<ConcordanceItem>(); } private void GetSentences() { List<char> list = _textInput.ToList(); int start = 0; for (int i = 0; i < list.Count() - 1; i++) { if ((i == _textInput.Length - 2) || (list[i] == '.') & (list[i + 1] == ' ') & (Char.IsUpper(list[i + 2]))) { int end = (i - start); _sentences.Add(new Sentence(_textInput.Substring(start, end + 1))); start = end; } } } public void Generate() { this.GetSentences(); int i = 0; _sentences.ForEach(x => { i++; int cur = 0; x.Words.ToList().ForEach(y => { var p = x.Words.ToList().FindAll(k => k.Equals(y)); if (_concordanceList.Find(j => j.Text.Equals(y)) == null) { cur = i; _concordanceList.Add(new ConcordanceItem(y, 1, Builder(i, p.Count()))); } else { _concordanceList.Find(j => j.Text.Equals(y)).Increment(); _concordanceList.Find(j => j.Text.Equals(y)).UpdateCondordance(Builder(i, p.Count())); } }); }); } private string Builder(int index, int count) { string result = string.Empty; for (int i = 0; i < count; i++) { result = string.Format("{0},", index.ToString()); } return result.TrimEnd(','); } public void Render() { char index = 'a'; char reset = 'a'; foreach (var item in this.ConcordanceList) { if (item.Text != string.Empty && item.Text != ".") { string indexer = ((char.IsLetter(index)) ? index.ToString() : string.Format("{0}{0}", (reset++).ToString()).ToString()); Console.WriteLine(string.Format("{0}. {1}{2}", indexer, item.Text, item.Concordance.PadLeft(20 - item.Text.Length, ' '))); index++; } } Console.ReadLine(); } } } |
ConcordanceItem
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace App.Test { public class ConcordanceItem { private string _text; private string _concordance; private int _noOfOccurence = 0; public ConcordanceItem(string text, int count, string concordance) { _text = text; _concordance = concordance; _noOfOccurence = count; } public string Concordance { get { return string.Format("{{{0}:{1}}}", _noOfOccurence, _concordance.TrimEnd(',')); } } public int NoOfOccurence { get { return _noOfOccurence; } } public string Text { get { return _text; } } public void Increment() { _noOfOccurence += 1; } public void UpdateCondordance(string concordance) { _concordance = string.Format("{0},{1}", _concordance, concordance); } } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace App.Test { public class Sentence { private string _content; private string[] _words; public Sentence(string sentence) { _content = sentence; _words = sentence.Split().Where(x => !x.Equals(string.Empty) || !x.Equals('.')).ToArray(); } public string Content { get { return !string.IsNullOrEmpty(_content) ? _content : string.Empty; } } public string[] Words { get { return (_words.Count() > 0) ? _words : null; } } } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace App.Test { class Program { static void Main(string[] args) { string textInput = "Given an arbitrary text document written in English, write a program that will generate a concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies. Bonus: label each word with the sentence numbers in which each occurrence appeared."; ConcordanceManager mgr = new ConcordanceManager(textInput.Replace(',', ' ')); mgr.Generate(); mgr.Render(); } } } |
Output:
Happy Coding!
Comments