GfmlTrie.java 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /*
  2. XOWA: the XOWA Offline Wiki Application
  3. Copyright (C) 2012-2017 gnosygnu@gmail.com
  4. XOWA is licensed under the terms of the General Public License (GPL) Version 3,
  5. or alternatively under the terms of the Apache License Version 2.0.
  6. You may use XOWA according to either of these licenses as is most appropriate
  7. for your project on a case-by-case basis.
  8. The terms of each license can be found in the source code repository:
  9. GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
  10. Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
  11. */
  12. package gplx.gfml;
  13. import gplx.types.basics.lists.Ordered_hash;
  14. import gplx.types.basics.lists.Ordered_hash_;
  15. import gplx.core.texts.CharStream;
  16. import gplx.types.errs.ErrUtl;
  17. import gplx.types.basics.utls.StringUtl;
  18. public class GfmlTrie {
  19. public String[] Symbols() {
  20. String[] rv = new String[symbols.Len()];
  21. for (int i = 0; i < rv.length; i++)
  22. rv[i] = StringUtl.Cast(symbols.GetAt(i));
  23. return rv;
  24. } Ordered_hash symbols = Ordered_hash_.New();
  25. public int LastMatchCount; // PERF: prop is faster than method
  26. public Object FindMatch(CharStream stream) {
  27. Object result = null; int moveCount = 0; LastMatchCount = 0;
  28. IntObjHash_base link = rootLink;
  29. while (stream.AtMid()) {
  30. Object found = link.Get_by(stream.Cur());
  31. if (found == null) break; // found is null; can happen for false matches; ex: <!-- reg, and <!-* is cur; goes to <!- before exit
  32. LastMatchCount++;
  33. IntObjHash_base foundAsLink = IntObjHash_base_.as_(found);
  34. if (foundAsLink != null) { // if (found is link) (ie: another link exists)
  35. result = foundAsLink.Bay(); // set .Bay as possible result; needed for short-long pairs; ex: < and <!-- exist; < found, but need to check next symbol for !
  36. link = foundAsLink; // make foundAsLink the current one
  37. moveCount++;
  38. stream.MoveNext();
  39. }
  40. else { // not a link; must be last
  41. result = found;
  42. break;
  43. }
  44. }
  45. if (moveCount > 0) stream.MoveBackBy(moveCount); // restore stream to original position; imitate idempotency
  46. return result;
  47. }
  48. public void Add(String symbol, Object data) {
  49. if (data == null) throw ErrUtl.NewArgs("null objects cannot be registered", "symbol", symbol);
  50. char[] ary = StringUtl.ToCharAry(symbol); int lastIndex = ary.length - 1;
  51. IntObjHash_base curLink = rootLink;
  52. for (int i = 0; i < ary.length; i++) {
  53. char c = ary[i];
  54. Object found = curLink.Get_by(c);
  55. IntObjHash_base foundAsLink = IntObjHash_base_.as_(found);
  56. if (i == lastIndex) { // lastChar
  57. if (found != null) { // slot is occupied
  58. if (foundAsLink != null) // found is link; occurs when symbol is shorter than existing: ex: adding '<' when '<!--' exists)
  59. foundAsLink.Bay_set(data); // place found in link's bay
  60. else // found is makr; occurs when symbol is the same as existing; ex: adding '<!' when '<!' exists
  61. curLink.Set(c, data); // place found in slot (replaces slot)
  62. }
  63. else // slot is unoccupied
  64. curLink.Add(c, data); // place found in slot
  65. }
  66. else { // not lastChar
  67. if (foundAsLink == null) { // next link does not exist (NOTE: next link is necessary, since char is not lastChar)
  68. foundAsLink = IntObjHash_base_.new_(); // create next link
  69. if (found != null) { // slot is occupied; occurs when symbol is longer than existing: ex: adding '<!--' when '<' exists
  70. foundAsLink.Bay_set(found); // transplant occupied found to link's Bay
  71. curLink.Set(c, foundAsLink); // place new link in slot
  72. }
  73. else // slot is unoccupied
  74. curLink.Add(c, foundAsLink); // place found in slot
  75. }
  76. curLink = foundAsLink;
  77. }
  78. }
  79. symbols.AddIfDupeUseNth(symbol, symbol);
  80. }
  81. public void Del(String symbol) {
  82. char[] ary = StringUtl.ToCharAry(symbol); int lastIndex = ary.length - 1;
  83. IntObjHash_base[] linkAry = new IntObjHash_base[ary.length]; // first, get linkAry -- one link for each symbol
  84. IntObjHash_base link = rootLink;
  85. for (int i = 0; i < ary.length; i++) {
  86. char c = ary[i];
  87. linkAry[i] = link;
  88. link = IntObjHash_base_.as_(link.Get_by(c));
  89. if (link == null) break; // c does not have nextHash; break
  90. }
  91. IntObjHash_base nextHash = null;
  92. for (int i = lastIndex; i >= 0; i--) { // remove each char from hashes; must move backwards
  93. char c = ary[i];
  94. IntObjHash_base curLink = linkAry[i];
  95. Object found = curLink.Get_by(c);
  96. IntObjHash_base foundAsLink = IntObjHash_base_.as_(found);
  97. if (nextHash != null && nextHash.Bay() != null) // occurs when long is dropped; ex: '<-' and '<'; '<-' dropped; <'s .Bay in '<-' chain must be transplanted to '<' .Bay
  98. curLink.Set(c, nextHash.Bay());
  99. else if (foundAsLink != null && foundAsLink.Bay() != null) // occurs when short is dropped; ex: '<-' and '<'; '<' dropped; <'s .Bay must be transplanted to '<' .Bay in '<-' chain
  100. foundAsLink.Bay_set(found);
  101. else // no long/short overlap; simply remove
  102. curLink.Del(c);
  103. nextHash = curLink;
  104. }
  105. symbols.Del(symbol);
  106. }
  107. public void Clear() {rootLink.Clear();}
  108. IntObjHash_base rootLink = IntObjHash_base_.new_();
  109. public static GfmlTrie new_() {return new GfmlTrie();}
  110. }