Concordance Algorithm


I was given this test before in one of my Job Interviews will not name the company. I'll show you how I tackled the problem.

"Given an arbitrary text document written in English, write a program that
will generate a concordance, i.e. an alphabetical list of all word
occurrences, labeled with word frequencies. Bonus: label each word with the
sentence numbers in which each occurrence appeared."
 
For example, this is a concordance of the above text:
 
a.   a                  {2:1,1}
b.   all         {1:1}
c.   alphabetical       {1:1}
d.   an                 {2:1,1}
e.   appeared           {1:2}
f.   arbitrary          {1:1}
g.   bonus              {1:2}
h.   concordance        {1:1}
i.   document           {1:1}
j.   each               {1:2}
k.   english            {1:1}
l.   frequencies        {1:1}
m.   generate           {1:1}
n.   given              {1:1}
o.   i.e.               {1:1}
p.   in                 {2:1,2}
q.   it                 {1:2}
r.   label              {1:2}
s.   labeled            {1:1}
t.   list               {1:1}
u.   numbers            {1:2}
v.   occurrences        {1:1}
w.   of                 {1:1}
x.   program            {1:1}
y.   sentence           {1:2}
z.   text               {1:1}
aa.  that               {1:1}
bb.  the                {1:2}
cc.  which              {1:2}
dd.  will               {1:1}
ee.  with               {2:1,2}
ff.  word               {3:1,1,2}
gg.  write              {1:1}
hh.  written            {1:1}




Here is my solution to the above.

ConcordanceManager
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace App.Test
{
    public class ConcordanceManager
    {
        private string _textInput;
        private List<Sentence> _sentences;
        private List<ConcordanceItem> _concordanceList;
        public List<ConcordanceItem> ConcordanceList
        {
            get
            {
                return _concordanceList.OrderBy(x => x.Text).ToList();
            }
        }
        public ConcordanceManager(string inputText)
        {
            _textInput = inputText;
            _sentences = new List<Sentence>();
            _concordanceList = new List<ConcordanceItem>();
        }
        private void GetSentences()
        {
            List<char> list = _textInput.ToList();
            int start = 0;
            for (int i = 0; i < list.Count() - 1; i++)
            {

                if ((i == _textInput.Length - 2) || (list[i] == '.') & (list[i + 1] == ' ') & (Char.IsUpper(list[i + 2])))
                {
                    int end = (i - start);
                    _sentences.Add(new Sentence(_textInput.Substring(start, end + 1)));
                    start = end;
                }
            }
        }
        public void Generate()
        {
            this.GetSentences();
            int i = 0;

            _sentences.ForEach(x =>
            {
                i++;
                int cur = 0;
                x.Words.ToList().ForEach(y =>
                {
                    var p = x.Words.ToList().FindAll(k => k.Equals(y));

                    if (_concordanceList.Find(j => j.Text.Equals(y)) == null)
                    {
                        cur = i;
                        _concordanceList.Add(new ConcordanceItem(y, 1, Builder(i, p.Count())));
                    }
                    else
                    {

                        _concordanceList.Find(j => j.Text.Equals(y)).Increment();
                        _concordanceList.Find(j => j.Text.Equals(y)).UpdateCondordance(Builder(i, p.Count()));
                    }

                });
            });
        }
        private string Builder(int index, int count)
        {
            string result = string.Empty;          
            for (int i = 0; i < count; i++)
            {
                result = string.Format("{0},", index.ToString());
            }
            return result.TrimEnd(',');
        }
        public void Render()
        {
            char index = 'a';
            char reset = 'a';
            foreach (var item in this.ConcordanceList)
            {
                if (item.Text != string.Empty && item.Text != ".")
                {
                    string indexer = ((char.IsLetter(index)) ? index.ToString() : string.Format("{0}{0}", (reset++).ToString()).ToString());

                     Console.WriteLine(string.Format("{0}.  {1}{2}", indexer, item.Text, item.Concordance.PadLeft(20 - item.Text.Length, ' ')));
                    index++;
                }
            }
           
            Console.ReadLine();
        }
    }
}

ConcordanceItem
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace App.Test
{
    public class ConcordanceItem
    {
        private string _text;
        private string _concordance;
        private int _noOfOccurence = 0;
        public ConcordanceItem(string text, int count, string concordance)
        {
            _text = text;
            _concordance = concordance;
            _noOfOccurence = count;

        }
        public string Concordance
        {
            get
            {
                return string.Format("{{{0}:{1}}}", _noOfOccurence, _concordance.TrimEnd(','));
            }
        }
        public int NoOfOccurence
        {
            get
            {
                return _noOfOccurence;
            }
        }
        public string Text
        {
            get
            {
                return _text;
            }
        }
        public void Increment()
        {
            _noOfOccurence += 1;
        }
        public void UpdateCondordance(string concordance)
        {
            _concordance = string.Format("{0},{1}", _concordance, concordance);
        }
    }
}

Sentence
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace App.Test
{
    public class Sentence
    {
        private string _content;
        private string[] _words;
        public Sentence(string sentence)
        {
            _content = sentence;
            _words = sentence.Split().Where(x => !x.Equals(string.Empty) || !x.Equals('.')).ToArray();
          
        }
        public string Content
        {
            get
            {
                return !string.IsNullOrEmpty(_content) ? _content : string.Empty;
            }
        }
        public string[] Words
        {
            get
            {
                return (_words.Count() > 0) ? _words : null;
            }
        }

    }
}

Program
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace App.Test
{
    class Program
    {
        static void Main(string[] args)
        {
            string textInput = "Given an arbitrary text document written in English, write a program that will generate a concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies. Bonus: label each word with the sentence numbers in which each occurrence appeared.";
            ConcordanceManager mgr = new ConcordanceManager(textInput.Replace(',', ' '));
            mgr.Generate();
            mgr.Render();
           
        }
    }
}

Output:


Happy Coding!

Comments

Popular posts from this blog

Serializing JSON string to ExpandoObject

XML to ExpandoObject using Recursion in C#

Automatically Discover and Assign Parameter with Values to a Stored Procedure Call in C#