LemmaDict.java
3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package LIA_topic_seg;
import java.util.*;
/**
* Cette classe reprŽsente le contenu d'un dictionnaire de lemmes, chaque lemme Žtant dŽfini par un
* code (numŽro d'index), son contenu textuel, le poids maximal qui lui est attribuŽ dans le texte,
* le nombre de fois o il apparait dans le texte, et la liste des phrases dans lesquelles il apparait.
*
*/
class LemmaDict
{
//--------------------------------------------------------------------------
class Lemma
{
int code;
String content;
float maxWeight;
int occurences = 1;
private ArrayList<Integer> sentenceIdxList = new ArrayList<Integer>();
private Lemma(int c, float w, String s, int i)
{
code = c;
maxWeight = w;
content = s;
sentenceIdxList.add(i);
}
int sentenceCount()
{
return sentenceIdxList.size();
}
int sentenceIdx(int i)
{
return sentenceIdxList.get(i);
}
int localHiatus()
{
int size = sentenceIdxList.size();
return (sentenceIdxList.get(size-1) - sentenceIdxList.get(0)) / size + 1;
}
}
private ArrayList<Lemma> _array = new ArrayList<Lemma>();
private Hashtable<String, Integer> _table = new Hashtable<String,Integer>();
//--------------------------------------------------------------------------
int lemmaCode(String content)
{
return _table.get(content);
}
//--------------------------------------------------------------------------
Lemma lemmaObject(int lemmaCode)
{
return _array.get(lemmaCode);
}
//--------------------------------------------------------------------------
int lemmaCount()
{
return _array.size();
}
//--------------------------------------------------------------------------
int add(String content, float weight, int sentenceIdx)
{
Lemma l;
Integer i = _table.get(content);
if (i == null)
{
int code = _array.size();
String s = new String(content);
_table.put(s, code);
_array.add(new Lemma(code, weight, s, sentenceIdx));
return code;
}
l = _array.get(i);
l.maxWeight = Math.max(weight, l.maxWeight);
l.occurences++;
l.sentenceIdxList.add(sentenceIdx);
return i;
}
//--------------------------------------------------------------------------
public String toString()
{
java.io.StringWriter w = new java.io.StringWriter();
w.write(super.toString()+"\n");
for (int i=0; i<lemmaCount(); i++)
{
Lemma l = lemmaObject(i);
w.write("LEMMA"
+" code("+l.code /*= i*/+")"
+" content("+l.content+")"
+" maxWeight("+l.maxWeight+")"
+" occurences("+l.occurences+")"
+" localHiatus("+l.localHiatus()+")"
+" sentences#(");
for (int j=0; j<l.sentenceCount(); j++)
{
if (j!=0)
w.write(",");
w.write(""+l.sentenceIdx(j)); // "" mandatory to avoid bug
}
w.write(")\n");
}
return w.toString();
}
//--------------------------------------------------------------------------
}