001 package org.maltparser.core.syntaxgraph.feature;
002
003 import java.util.LinkedHashMap;
004 import java.util.Map;
005 import org.maltparser.core.exception.MaltChainedException;
006 import org.maltparser.core.feature.function.AddressFunction;
007 import org.maltparser.core.feature.function.FeatureFunction;
008 import org.maltparser.core.feature.value.AddressValue;
009 import org.maltparser.core.feature.value.FeatureValue;
010 import org.maltparser.core.feature.value.SingleFeatureValue;
011 import org.maltparser.core.io.dataformat.ColumnDescription;
012 import org.maltparser.core.symbol.SymbolTable;
013 import org.maltparser.core.symbol.SymbolTableHandler;
014 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
015 import org.maltparser.core.syntaxgraph.SyntaxGraphException;
016 import org.maltparser.core.syntaxgraph.node.DependencyNode;
017
018 public class DistanceFeature implements FeatureFunction {
019 protected AddressFunction addressFunction1;
020 protected AddressFunction addressFunction2;
021 protected SymbolTableHandler tableHandler;
022 protected SymbolTable table;
023 protected SingleFeatureValue featureValue;
024 protected String normalizationString;
025 protected Map<Integer,String> normalization;
026
027
028 public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
029 super();
030 featureValue = new SingleFeatureValue(this);
031 setTableHandler(tableHandler);
032 normalization = new LinkedHashMap<Integer,String>();
033 }
034
035 /**
036 * Initialize the distance feature function
037 *
038 * @param arguments an array of arguments with the type returned by getParameterTypes()
039 * @throws MaltChainedException
040 */
041 public void initialize(Object[] arguments) throws MaltChainedException {
042 if (arguments.length != 3) {
043 throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. ");
044 }
045 // Checks that the two arguments are address functions
046 if (!(arguments[0] instanceof AddressFunction)) {
047 throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. ");
048 }
049 if (!(arguments[1] instanceof AddressFunction)) {
050 throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. ");
051 }
052 if (!(arguments[2] instanceof java.lang.String)) {
053 throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. ");
054 }
055 setAddressFunction1((AddressFunction)arguments[0]);
056 setAddressFunction2((AddressFunction)arguments[1]);
057
058 // Creates a symbol table called "DISTANCE" using one null value
059 setSymbolTable(tableHandler.addSymbolTable("DISTANCE", ColumnDescription.INPUT, "one"));
060 normalizationString = (String)arguments[2];
061 String[] items = normalizationString.split("\\|");
062
063 if (items.length <= 0 || !items[0].equals("0")) {
064 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0.");
065 }
066 int tmp = -1;
067 for (int i = 0; i < items.length; i++) {
068 int v;
069 try {
070 v = Integer.parseInt(items[i]);
071 } catch (NumberFormatException e) {
072 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e);
073 }
074 normalization.put(v, ">="+v);
075 table.addSymbol(">="+v);
076 if (tmp != -1 && tmp >= v) {
077 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |");
078 }
079 tmp = v;
080 }
081 }
082
083 /**
084 * Returns an array of class types used by the feature extraction system to invoke initialize with
085 * correct arguments.
086 *
087 * @return an array of class types
088 */
089 public Class<?>[] getParameterTypes() {
090 Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class,
091 org.maltparser.core.feature.function.AddressFunction.class,
092 java.lang.String.class};
093 return paramTypes;
094 }
095
096 /**
097 * Returns the string representation of the integer <code>code</code> according to the distance feature function.
098 *
099 * @param code the integer representation of the symbol
100 * @return the string representation of the integer <code>code</code> according to the distance feature function.
101 * @throws MaltChainedException
102 */
103 public String getSymbol(int code) throws MaltChainedException {
104 return table.getSymbolCodeToString(code);
105 }
106
107 /**
108 * Returns the integer representation of the string <code>symbol</code> according to the distance feature function.
109 *
110 * @param symbol the string representation of the symbol
111 * @return the integer representation of the string <code>symbol</code> according to the distance feature function.
112 * @throws MaltChainedException
113 */
114 public int getCode(String symbol) throws MaltChainedException {
115 return table.getSymbolStringToCode(symbol);
116 }
117
118 /**
119 * Cause the distance feature function to update the cardinality of the feature value.
120 *
121 * @throws MaltChainedException
122 */
123 public void updateCardinality() {
124 featureValue.setCardinality(table.getValueCounter());
125 }
126
127 /**
128 * Cause the feature function to update the feature value.
129 *
130 * @throws MaltChainedException
131 */
132 public void update() throws MaltChainedException {
133 // Retrieve the address value
134 final AddressValue arg1 = addressFunction1.getAddressValue();
135 final AddressValue arg2 = addressFunction2.getAddressValue();
136
137 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value
138 if (arg1.getAddress() == null || arg2.getAddress() == null) {
139 featureValue.setCode(table.getNullValueCode(NullValueId.NO_NODE));
140 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE));
141 featureValue.setKnown(true);
142 featureValue.setNullValue(true);
143 } else {
144 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class);
145 // Cast the address arguments to dependency nodes
146 final DependencyNode node1 = (DependencyNode)arg1.getAddress();
147 final DependencyNode node2 = (DependencyNode)arg2.getAddress();
148
149 if (!node1.isRoot() && !node2.isRoot()) {
150 // Calculates the distance
151 final int index1 = node1.getIndex();
152 final int index2 = node2.getIndex();
153 final int distance = Math.abs(index1-index2);
154
155
156 int lower = -1;
157 boolean f = false;
158 for (Integer upper : normalization.keySet()) {
159 if (distance >= lower && distance < upper) {
160 featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower)));
161 featureValue.setSymbol(normalization.get(lower));
162 f = true;
163 break;
164 }
165 lower = upper;
166 }
167 if (f == false) {
168 featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower)));
169 featureValue.setSymbol(normalization.get(lower));
170 }
171
172 // Tells the feature value that the feature is known and is not a null value
173 featureValue.setKnown(true);
174 featureValue.setNullValue(false);
175
176 } else {
177 // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value
178 featureValue.setCode(table.getNullValueCode(NullValueId.ROOT_NODE));
179 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE));
180 featureValue.setKnown(true);
181 featureValue.setNullValue(true);
182 }
183 }
184 }
185
186 /**
187 * Returns the feature value
188 *
189 * @return the feature value
190 */
191 public FeatureValue getFeatureValue() {
192 return featureValue;
193 }
194
195 /**
196 * Returns the symbol table used by the distance feature function
197 *
198 * @return the symbol table used by the distance feature function
199 */
200 public SymbolTable getSymbolTable() {
201 return table;
202 }
203
204 /**
205 * Returns the address function 1 (argument 1)
206 *
207 * @return the address function 1 (argument 1)
208 */
209 public AddressFunction getAddressFunction1() {
210 return addressFunction1;
211 }
212
213
214 /**
215 * Sets the address function 1 (argument 1)
216 *
217 * @param addressFunction1 a address function 1 (argument 1)
218 */
219 public void setAddressFunction1(AddressFunction addressFunction1) {
220 this.addressFunction1 = addressFunction1;
221 }
222
223 /**
224 * Returns the address function 2 (argument 2)
225 *
226 * @return the address function 1 (argument 2)
227 */
228 public AddressFunction getAddressFunction2() {
229 return addressFunction2;
230 }
231
232 /**
233 * Sets the address function 2 (argument 2)
234 *
235 * @param addressFunction2 a address function 2 (argument 2)
236 */
237 public void setAddressFunction2(AddressFunction addressFunction2) {
238 this.addressFunction2 = addressFunction2;
239 }
240
241 /**
242 * Returns symbol table handler
243 *
244 * @return a symbol table handler
245 */
246 public SymbolTableHandler getTableHandler() {
247 return tableHandler;
248 }
249
250 /**
251 * Sets the symbol table handler
252 *
253 * @param tableHandler a symbol table handler
254 */
255 public void setTableHandler(SymbolTableHandler tableHandler) {
256 this.tableHandler = tableHandler;
257 }
258
259 /**
260 * Sets the symbol table used by the distance feature function
261 *
262 * @param table
263 */
264 public void setSymbolTable(SymbolTable table) {
265 this.table = table;
266 }
267
268 public boolean equals(Object obj) {
269 if (this == obj)
270 return true;
271 if (obj == null)
272 return false;
273 if (getClass() != obj.getClass())
274 return false;
275 return obj.toString().equals(this.toString());
276 }
277
278 public int hashCode() {
279 return 217 + (null == toString() ? 0 : toString().hashCode());
280 }
281
282 public String toString() {
283 final StringBuilder sb = new StringBuilder();
284 sb.append("Distance(");
285 sb.append(addressFunction1.toString());
286 sb.append(", ");
287 sb.append(addressFunction2.toString());
288 sb.append(", ");
289 sb.append(normalizationString);
290 sb.append(')');
291 return sb.toString();
292 }
293 }
294