1. package lexers
    
  2. 
    
  3. import (
    
  4. 	"regexp"
    
  5. 	"strings"
    
  6. 	"unicode/utf8"
    
  7. 
    
  8. 	"github.com/dlclark/regexp2"
    
  9. 
    
  10. 	. "github.com/alecthomas/chroma/v2" // nolint
    
  11. )
    
  12. 
    
  13. // Raku lexer.
    
  14. var Raku Lexer = Register(MustNewLexer(
    
  15. 	&Config{
    
  16. 		Name:    "Raku",
    
  17. 		Aliases: []string{"perl6", "pl6", "raku"},
    
  18. 		Filenames: []string{
    
  19. 			"*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
    
  20. 			"*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
    
  21. 		},
    
  22. 		MimeTypes: []string{
    
  23. 			"text/x-perl6", "application/x-perl6",
    
  24. 			"text/x-raku", "application/x-raku",
    
  25. 		},
    
  26. 		DotAll: true,
    
  27. 	},
    
  28. 	rakuRules,
    
  29. ))
    
  30. 
    
  31. func rakuRules() Rules {
    
  32. 	type RakuToken int
    
  33. 
    
  34. 	const (
    
  35. 		rakuQuote RakuToken = iota
    
  36. 		rakuNameAttribute
    
  37. 		rakuPod
    
  38. 		rakuPodFormatter
    
  39. 		rakuPodDeclaration
    
  40. 		rakuMultilineComment
    
  41. 		rakuMatchRegex
    
  42. 		rakuSubstitutionRegex
    
  43. 	)
    
  44. 
    
  45. 	const (
    
  46. 		colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
    
  47. 		colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
    
  48. 		colonPairPattern         = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
    
  49. 		colonPairLookahead       = `(?=(:['\w-]+` +
    
  50. 			colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
    
  51. 		namePattern           = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
    
  52. 		variablePattern       = `[$@%&]+[.^:?=!~]?` + namePattern
    
  53. 		globalVariablePattern = `[$@%&]+\*` + namePattern
    
  54. 	)
    
  55. 
    
  56. 	keywords := []string{
    
  57. 		`BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
    
  58. 		`KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
    
  59. 		`class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
    
  60. 		`grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
    
  61. 		`module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
    
  62. 		`where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
    
  63. 		`submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
    
  64. 		`use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
    
  65. 		`symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
    
  66. 		`pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
    
  67. 		`dynamic-scope`, `built`, `temp`,
    
  68. 	}
    
  69. 
    
  70. 	keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
    
  71. 
    
  72. 	wordOperators := []string{
    
  73. 		`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
    
  74. 		`gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
    
  75. 		`but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
    
  76. 		`TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
    
  77. 		`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
    
  78. 	}
    
  79. 
    
  80. 	wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
    
  81. 
    
  82. 	operators := []string{
    
  83. 		`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
    
  84. 		`+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
    
  85. 		`<=>`, `^…^`, `^…`, `…^`, ``, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
    
  86. 		`::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
    
  87. 		`??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
    
  88. 		`,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, ``, ``, ``, ``, ``, ``,
    
  89. 		``, ``, ``, ``, `=:=`, `=~=`, ``, ``, ``, ``, ``, ``, ``, ``, ``, ``, ``,
    
  90. 		``, ``, ``, ``, `:`, `!!!`, `???`, `¯`, `×`, `÷`, ``, ``, ``,
    
  91. 	}
    
  92. 
    
  93. 	operatorsPattern := Words(``, ``, operators...)
    
  94. 
    
  95. 	builtinTypes := []string{
    
  96. 		`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
    
  97. 		`atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
    
  98. 		`Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
    
  99. 		`CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
    
  100. 		`CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
    
  101. 		`CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
    
  102. 		`CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
    
  103. 		`CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
    
  104. 		`DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
    
  105. 		`Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
    
  106. 		`Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
    
  107. 		`Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
    
  108. 		`IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
    
  109. 		`IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
    
  110. 		`IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
    
  111. 		`IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
    
  112. 		`IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
    
  113. 		`Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
    
  114. 		`Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
    
  115. 		`Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
    
  116. 		`Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
    
  117. 		`Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
    
  118. 		`Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
    
  119. 		`Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
    
  120. 		`Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
    
  121. 		`Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
    
  122. 		`MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
    
  123. 		`Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
    
  124. 		`Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
    
  125. 		`Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
    
  126. 		`Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
    
  127. 		`PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
    
  128. 		`Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
    
  129. 		`Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
    
  130. 		`Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
    
  131. 		`Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
    
  132. 		`Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
    
  133. 		`Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
    
  134. 		`uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
    
  135. 		`WhateverCode`, `WrapHandle`, `NativeCall`,
    
  136. 		// Pragmas
    
  137. 		`precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
    
  138. 		`MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
    
  139. 		`strict`, `trace`, `variables`,
    
  140. 	}
    
  141. 
    
  142. 	builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
    
  143. 
    
  144. 	builtinRoutines := []string{
    
  145. 		`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
    
  146. 		`acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
    
  147. 		`add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
    
  148. 		`add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
    
  149. 		`all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
    
  150. 		`antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
    
  151. 		`archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
    
  152. 		`ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
    
  153. 		`atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
    
  154. 		`atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
    
  155. 		`await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
    
  156. 		`basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
    
  157. 		`bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
    
  158. 		`bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
    
  159. 		`callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
    
  160. 		`candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
    
  161. 		`cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
    
  162. 		`cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
    
  163. 		`child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
    
  164. 		`classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
    
  165. 		`codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
    
  166. 		`command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
    
  167. 		`compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
    
  168. 		`configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
    
  169. 		`content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
    
  170. 		`count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
    
  171. 		`curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
    
  172. 		`day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
    
  173. 		`default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
    
  174. 		`DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
    
  175. 		`diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
    
  176. 		`DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
    
  177. 		`eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
    
  178. 		`endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
    
  179. 		`eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
    
  180. 		`excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
    
  181. 		`expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
    
  182. 		`FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
    
  183. 		`find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
    
  184. 		`flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
    
  185. 		`free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
    
  186. 		`full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
    
  187. 		`gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
    
  188. 		`has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
    
  189. 		`hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
    
  190. 		`indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
    
  191. 		`install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
    
  192. 		`invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
    
  193. 		`is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
    
  194. 		`is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
    
  195. 		`is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
    
  196. 		`kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
    
  197. 		`lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
    
  198. 		`List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
    
  199. 		`loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
    
  200. 		`map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
    
  201. 		`methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
    
  202. 		`MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
    
  203. 		`mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
    
  204. 		`nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
    
  205. 		`new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
    
  206. 		`nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
    
  207. 		`nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
    
  208. 		`Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
    
  209. 		`ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
    
  210. 		`ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
    
  211. 		`package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
    
  212. 		`parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
    
  213. 		`parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
    
  214. 		`permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
    
  215. 		`polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
    
  216. 		`precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
    
  217. 		`primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
    
  218. 		`private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
    
  219. 		`protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
    
  220. 		`push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
    
  221. 		`quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
    
  222. 		`read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
    
  223. 		`read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
    
  224. 		`read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
    
  225. 		`reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
    
  226. 		`rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
    
  227. 		`replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
    
  228. 		`result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
    
  229. 		`rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
    
  230. 		`rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
    
  231. 		`samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
    
  232. 		`sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
    
  233. 		`set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
    
  234. 		`set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
    
  235. 		`set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
    
  236. 		`setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
    
  237. 		`short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
    
  238. 		`signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
    
  239. 		`skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
    
  240. 		`Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
    
  241. 		`socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
    
  242. 		`splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
    
  243. 		`started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
    
  244. 		`store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
    
  245. 		`subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
    
  246. 		`subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
    
  247. 		`take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
    
  248. 		`term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
    
  249. 		`tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
    
  250. 		`trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
    
  251. 		`trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
    
  252. 		`typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
    
  253. 		`uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
    
  254. 		`unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
    
  255. 		`USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
    
  256. 		`verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
    
  257. 		`watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
    
  258. 		`what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
    
  259. 		`with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
    
  260. 		`write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
    
  261. 		`write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
    
  262. 		`write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
    
  263. 		`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
    
  264. 	}
    
  265. 
    
  266. 	builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
    
  267. 
    
  268. 	// A map of opening and closing brackets
    
  269. 	brackets := map[rune]rune{
    
  270. 		'\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
    
  271. 		'\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
    
  272. 		'\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
    
  273. 		'\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
    
  274. 		'\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
    
  275. 		'\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
    
  276. 		'\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
    
  277. 		'\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
    
  278. 		'\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
    
  279. 		'\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
    
  280. 		'\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
    
  281. 		'\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
    
  282. 		'\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
    
  283. 		'\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
    
  284. 		'\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
    
  285. 		'\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
    
  286. 		'\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
    
  287. 		'\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
    
  288. 		'\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
    
  289. 		'\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
    
  290. 		'\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
    
  291. 		'\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
    
  292. 		'\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
    
  293. 		'\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
    
  294. 		'\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
    
  295. 		'\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
    
  296. 		'\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
    
  297. 		'\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
    
  298. 		'\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
    
  299. 		'\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
    
  300. 		'\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
    
  301. 		'\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
    
  302. 		'\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
    
  303. 		'\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
    
  304. 		'\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
    
  305. 		'\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
    
  306. 		'\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
    
  307. 		'\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
    
  308. 		'\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
    
  309. 		'\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
    
  310. 		'\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
    
  311. 		'\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
    
  312. 		'\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
    
  313. 		'\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
    
  314. 		'\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
    
  315. 		'\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
    
  316. 		'\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
    
  317. 		'\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
    
  318. 		'\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
    
  319. 		'\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
    
  320. 		'\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
    
  321. 		'\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
    
  322. 		'\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
    
  323. 		'\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
    
  324. 		'\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
    
  325. 		'\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
    
  326. 		'\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
    
  327. 		'\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
    
  328. 		'\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
    
  329. 		'\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
    
  330. 		'\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
    
  331. 		'\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
    
  332. 		'\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
    
  333. 		'\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
    
  334. 	}
    
  335. 
    
  336. 	bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
    
  337. 
    
  338. 	// Finds opening brackets and their closing counterparts (including pod and heredoc)
    
  339. 	// and modifies state groups and position accordingly
    
  340. 	findBrackets := func(tokenClass RakuToken) MutatorFunc {
    
  341. 		return func(state *LexerState) error {
    
  342. 			var openingChars []rune
    
  343. 			var adverbs []rune
    
  344. 			switch tokenClass {
    
  345. 			case rakuPod:
    
  346. 				openingChars = []rune(strings.Join(state.Groups[1:5], ``))
    
  347. 			default:
    
  348. 				adverbs = []rune(state.NamedGroups[`adverbs`])
    
  349. 				openingChars = []rune(state.NamedGroups[`opening_delimiters`])
    
  350. 			}
    
  351. 
    
  352. 			openingChar := openingChars[0]
    
  353. 
    
  354. 			nChars := len(openingChars)
    
  355. 
    
  356. 			var closingChar rune
    
  357. 			var closingCharExists bool
    
  358. 			var closingChars []rune
    
  359. 
    
  360. 			switch tokenClass {
    
  361. 			case rakuPod:
    
  362. 				closingCharExists = true
    
  363. 			default:
    
  364. 				closingChar, closingCharExists = brackets[openingChar]
    
  365. 			}
    
  366. 
    
  367. 			switch tokenClass {
    
  368. 			case rakuPodFormatter:
    
  369. 				formatter := StringOther
    
  370. 
    
  371. 				switch state.NamedGroups[`keyword`] {
    
  372. 				case "B":
    
  373. 					formatter = GenericStrong
    
  374. 				case "I":
    
  375. 					formatter = GenericEmph
    
  376. 				case "U":
    
  377. 					formatter = GenericUnderline
    
  378. 				}
    
  379. 
    
  380. 				formatterRule := ruleReplacingConfig{
    
  381. 					pattern:      `.+?`,
    
  382. 					tokenType:    formatter,
    
  383. 					mutator:      nil,
    
  384. 					stateName:    `pod-formatter`,
    
  385. 					rulePosition: bottomRule,
    
  386. 				}
    
  387. 
    
  388. 				err := replaceRule(formatterRule)(state)
    
  389. 				if err != nil {
    
  390. 					panic(err)
    
  391. 				}
    
  392. 
    
  393. 				err = replaceRule(ruleReplacingConfig{
    
  394. 					delimiter:              []rune{closingChar},
    
  395. 					tokenType:              Punctuation,
    
  396. 					stateName:              `pod-formatter`,
    
  397. 					pushState:              true,
    
  398. 					numberOfDelimiterChars: nChars,
    
  399. 					appendMutator:          popRule(formatterRule),
    
  400. 				})(state)
    
  401. 				if err != nil {
    
  402. 					panic(err)
    
  403. 				}
    
  404. 
    
  405. 				return nil
    
  406. 			case rakuMatchRegex:
    
  407. 				var delimiter []rune
    
  408. 				if closingCharExists {
    
  409. 					delimiter = []rune{closingChar}
    
  410. 				} else {
    
  411. 					delimiter = openingChars
    
  412. 				}
    
  413. 
    
  414. 				err := replaceRule(ruleReplacingConfig{
    
  415. 					delimiter: delimiter,
    
  416. 					tokenType: Punctuation,
    
  417. 					stateName: `regex`,
    
  418. 					popState:  true,
    
  419. 					pushState: true,
    
  420. 				})(state)
    
  421. 				if err != nil {
    
  422. 					panic(err)
    
  423. 				}
    
  424. 
    
  425. 				return nil
    
  426. 			case rakuSubstitutionRegex:
    
  427. 				delimiter := regexp2.Escape(string(openingChars))
    
  428. 
    
  429. 				err := replaceRule(ruleReplacingConfig{
    
  430. 					pattern:      `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
    
  431. 					tokenType:    ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
    
  432. 					rulePosition: topRule,
    
  433. 					stateName:    `regex`,
    
  434. 					popState:     true,
    
  435. 					pushState:    true,
    
  436. 				})(state)
    
  437. 				if err != nil {
    
  438. 					panic(err)
    
  439. 				}
    
  440. 
    
  441. 				return nil
    
  442. 			}
    
  443. 
    
  444. 			text := state.Text
    
  445. 
    
  446. 			var endPos int
    
  447. 
    
  448. 			var nonMirroredOpeningCharPosition int
    
  449. 
    
  450. 			if !closingCharExists {
    
  451. 				// it's not a mirrored character, which means we
    
  452. 				// just need to look for the next occurrence
    
  453. 				closingChars = openingChars
    
  454. 				nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
    
  455. 				endPos = nonMirroredOpeningCharPosition
    
  456. 			} else {
    
  457. 				var podRegex *regexp2.Regexp
    
  458. 				if tokenClass == rakuPod {
    
  459. 					podRegex = regexp2.MustCompile(
    
  460. 						state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
    
  461. 						0,
    
  462. 					)
    
  463. 				} else {
    
  464. 					closingChars = []rune(strings.Repeat(string(closingChar), nChars))
    
  465. 				}
    
  466. 
    
  467. 				// we need to look for the corresponding closing character,
    
  468. 				// keep nesting in mind
    
  469. 				nestingLevel := 1
    
  470. 
    
  471. 				searchPos := state.Pos - nChars
    
  472. 
    
  473. 				var nextClosePos int
    
  474. 
    
  475. 				for nestingLevel > 0 {
    
  476. 					if tokenClass == rakuPod {
    
  477. 						match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
    
  478. 						if err == nil {
    
  479. 							closingChars = match.Runes()
    
  480. 							nextClosePos = match.Index
    
  481. 						} else {
    
  482. 							nextClosePos = -1
    
  483. 						}
    
  484. 					} else {
    
  485. 						nextClosePos = indexAt(text, closingChars, searchPos+nChars)
    
  486. 					}
    
  487. 
    
  488. 					nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
    
  489. 
    
  490. 					switch {
    
  491. 					case nextClosePos == -1:
    
  492. 						nextClosePos = len(text)
    
  493. 						nestingLevel = 0
    
  494. 					case nextOpenPos != -1 && nextOpenPos < nextClosePos:
    
  495. 						nestingLevel++
    
  496. 						nChars = len(openingChars)
    
  497. 						searchPos = nextOpenPos
    
  498. 					default: // next_close_pos < next_open_pos
    
  499. 						nestingLevel--
    
  500. 						nChars = len(closingChars)
    
  501. 						searchPos = nextClosePos
    
  502. 					}
    
  503. 				}
    
  504. 
    
  505. 				endPos = nextClosePos
    
  506. 			}
    
  507. 
    
  508. 			if endPos < 0 {
    
  509. 				// if we didn't find a closer, just highlight the
    
  510. 				// rest of the text in this class
    
  511. 				endPos = len(text)
    
  512. 			}
    
  513. 
    
  514. 			adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
    
  515. 			var heredocTerminator []rune
    
  516. 			var endHeredocPos int
    
  517. 			if adverbre.MatchString(string(adverbs)) {
    
  518. 				if endPos != len(text) {
    
  519. 					heredocTerminator = text[state.Pos:endPos]
    
  520. 					nChars = len(heredocTerminator)
    
  521. 				} else {
    
  522. 					endPos = state.Pos + 1
    
  523. 					heredocTerminator = []rune{}
    
  524. 					nChars = 0
    
  525. 				}
    
  526. 
    
  527. 				if nChars > 0 {
    
  528. 					endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
    
  529. 					if endHeredocPos > -1 {
    
  530. 						endPos += endHeredocPos
    
  531. 					} else {
    
  532. 						endPos = len(text)
    
  533. 					}
    
  534. 				}
    
  535. 			}
    
  536. 
    
  537. 			textBetweenBrackets := string(text[state.Pos:endPos])
    
  538. 			switch tokenClass {
    
  539. 			case rakuPod, rakuPodDeclaration, rakuNameAttribute:
    
  540. 				state.NamedGroups[`value`] = textBetweenBrackets
    
  541. 				state.NamedGroups[`closing_delimiters`] = string(closingChars)
    
  542. 			case rakuQuote:
    
  543. 				if len(heredocTerminator) > 0 {
    
  544. 					// Length of heredoc terminator + closing chars + `;`
    
  545. 					heredocFristPunctuationLen := nChars + len(openingChars) + 1
    
  546. 
    
  547. 					state.NamedGroups[`opening_delimiters`] = string(openingChars) +
    
  548. 						string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
    
  549. 
    
  550. 					state.NamedGroups[`value`] =
    
  551. 						string(text[state.Pos+heredocFristPunctuationLen : endPos])
    
  552. 
    
  553. 					if endHeredocPos > -1 {
    
  554. 						state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
    
  555. 					}
    
  556. 				} else {
    
  557. 					state.NamedGroups[`value`] = textBetweenBrackets
    
  558. 					if nChars > 0 {
    
  559. 						state.NamedGroups[`closing_delimiters`] = string(closingChars)
    
  560. 					}
    
  561. 				}
    
  562. 			default:
    
  563. 				state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
    
  564. 			}
    
  565. 
    
  566. 			state.Pos = endPos + nChars
    
  567. 
    
  568. 			return nil
    
  569. 		}
    
  570. 	}
    
  571. 
    
  572. 	// Raku rules
    
  573. 	// Empty capture groups are placeholders and will be replaced by mutators
    
  574. 	// DO NOT REMOVE THEM!
    
  575. 	return Rules{
    
  576. 		"root": {
    
  577. 			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
    
  578. 			{`\A\z`, nil, nil},
    
  579. 			Include("common"),
    
  580. 			{`{`, Punctuation, Push(`root`)},
    
  581. 			{`\(`, Punctuation, Push(`root`)},
    
  582. 			{`[)}]`, Punctuation, Pop(1)},
    
  583. 			{`;`, Punctuation, nil},
    
  584. 			{`\[|\]`, Operator, nil},
    
  585. 			{`.+?`, Text, nil},
    
  586. 		},
    
  587. 		"common": {
    
  588. 			{`^#![^\n]*$`, CommentHashbang, nil},
    
  589. 			Include("pod"),
    
  590. 			// Multi-line, Embedded comment
    
  591. 			{
    
  592. 				"#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
    
  593. 				CommentMultiline,
    
  594. 				findBrackets(rakuMultilineComment),
    
  595. 			},
    
  596. 			{`#[^\n]*$`, CommentSingle, nil},
    
  597. 			// /regex/
    
  598. 			{
    
  599. 				`(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
    
  600. 				ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
    
  601. 				nil,
    
  602. 			},
    
  603. 			Include("variable"),
    
  604. 			// ::?VARIABLE
    
  605. 			{`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
    
  606. 			// Version
    
  607. 			{
    
  608. 				`\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
    
  609. 				ByGroups(Keyword, NumberInteger, NameEntity, Operator),
    
  610. 				nil,
    
  611. 			},
    
  612. 			Include("number"),
    
  613. 			// Hyperoperator | »*«
    
  614. 			{`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  615. 			{`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  616. 			// Hyperoperator | «*«
    
  617. 			{`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  618. 			{`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  619. 			// Hyperoperator | »*»
    
  620. 			{`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  621. 			{`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
    
  622. 			// <<quoted words>>
    
  623. 			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
    
  624. 			// «quoted words»
    
  625. 			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
    
  626. 			// [<]
    
  627. 			{`(?<=\[\\?)<(?=\])`, Operator, nil},
    
  628. 			// < and > operators | something < onething > something
    
  629. 			{
    
  630. 				`(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
    
  631. 				ByGroups(Operator, UsingSelf("root"), Operator),
    
  632. 				nil,
    
  633. 			},
    
  634. 			// <quoted words>
    
  635. 			{
    
  636. 				`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
    
  637. 				ByGroups(Punctuation, String, Punctuation),
    
  638. 				nil,
    
  639. 			},
    
  640. 			{`C?X::['\w:-]+`, NameException, nil},
    
  641. 			Include("metaoperator"),
    
  642. 			// Pair | key => value
    
  643. 			{
    
  644. 				`(\w[\w'-]*)(\s*)(=>)`,
    
  645. 				ByGroups(String, Text, Operator),
    
  646. 				nil,
    
  647. 			},
    
  648. 			Include("colon-pair"),
    
  649. 			// Token
    
  650. 			{
    
  651. 				`(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
    
  652. 				NameFunction,
    
  653. 				Push("token", "name-adverb"),
    
  654. 			},
    
  655. 			// Substitution
    
  656. 			{`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
    
  657. 			{keywordsPattern, Keyword, nil},
    
  658. 			{builtinTypesPattern, KeywordType, nil},
    
  659. 			{builtinRoutinesPattern, NameBuiltin, nil},
    
  660. 			// Class name
    
  661. 			{
    
  662. 				`(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
    
  663. 				NameClass,
    
  664. 				Push("name-adverb"),
    
  665. 			},
    
  666. 			//  Routine
    
  667. 			{
    
  668. 				`(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
    
  669. 				NameFunction,
    
  670. 				Push("name-adverb"),
    
  671. 			},
    
  672. 			// Constant
    
  673. 			{`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
    
  674. 			// Namespace
    
  675. 			{`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
    
  676. 			Include("operator"),
    
  677. 			Include("single-quote"),
    
  678. 			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
    
  679. 			// m,rx regex
    
  680. 			{`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
    
  681. 			// Quote constructs
    
  682. 			{
    
  683. 				`(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
    
  684. 				EmitterFunc(quote),
    
  685. 				findBrackets(rakuQuote),
    
  686. 			},
    
  687. 			// Function
    
  688. 			{
    
  689. 				`\b` + namePattern + colonPairLookahead + `\()`,
    
  690. 				NameFunction,
    
  691. 				Push("name-adverb"),
    
  692. 			},
    
  693. 			// Method
    
  694. 			{
    
  695. 				`(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
    
  696. 				NameFunction,
    
  697. 				Push("name-adverb"),
    
  698. 			},
    
  699. 			// Indirect invocant
    
  700. 			{namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
    
  701. 			{`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
    
  702. 			{`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
    
  703. 			{`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
    
  704. 			// Sigilless variable
    
  705. 			{
    
  706. 				`(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
    
  707. 				NameVariable,
    
  708. 				Push("name-adverb"),
    
  709. 			},
    
  710. 			{namePattern, Name, Push("name-adverb")},
    
  711. 		},
    
  712. 		"rx": {
    
  713. 			Include("colon-pair-attribute"),
    
  714. 			{
    
  715. 				`(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
    
  716. 				ByGroupNames(
    
  717. 					map[string]Emitter{
    
  718. 						`opening_delimiters`: Punctuation,
    
  719. 						`delimiter`:          nil,
    
  720. 					},
    
  721. 				),
    
  722. 				findBrackets(rakuMatchRegex),
    
  723. 			},
    
  724. 		},
    
  725. 		"substitution": {
    
  726. 			Include("colon-pair-attribute"),
    
  727. 			// Substitution | s{regex} = value
    
  728. 			{
    
  729. 				`(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
    
  730. 				ByGroupNames(map[string]Emitter{
    
  731. 					`opening_delimiters`: Punctuation,
    
  732. 					`delimiter`:          nil,
    
  733. 				}),
    
  734. 				findBrackets(rakuMatchRegex),
    
  735. 			},
    
  736. 			// Substitution | s/regex/string/
    
  737. 			{
    
  738. 				`(?<opening_delimiters>[^\w:\s])`,
    
  739. 				Punctuation,
    
  740. 				findBrackets(rakuSubstitutionRegex),
    
  741. 			},
    
  742. 		},
    
  743. 		"number": {
    
  744. 			{`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
    
  745. 			{`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
    
  746. 			{`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
    
  747. 			{
    
  748. 				`(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
    
  749. 				LiteralNumberFloat,
    
  750. 				nil,
    
  751. 			},
    
  752. 			{`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
    
  753. 			{`(?<=\d+)i`, NameConstant, nil},
    
  754. 			{`\d+(_\d+)*`, LiteralNumberInteger, nil},
    
  755. 		},
    
  756. 		"name-adverb": {
    
  757. 			Include("colon-pair-attribute-keyvalue"),
    
  758. 			Default(Pop(1)),
    
  759. 		},
    
  760. 		"colon-pair": {
    
  761. 			// :key(value)
    
  762. 			{colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
    
  763. 			// :123abc
    
  764. 			{
    
  765. 				`(:)(\d+)(\w[\w'-]*)`,
    
  766. 				ByGroups(Punctuation, UsingSelf("number"), String),
    
  767. 				nil,
    
  768. 			},
    
  769. 			// :key
    
  770. 			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
    
  771. 			{`\s+`, Text, nil},
    
  772. 		},
    
  773. 		"colon-pair-attribute": {
    
  774. 			// :key(value)
    
  775. 			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
    
  776. 			// :123abc
    
  777. 			{
    
  778. 				`(:)(\d+)(\w[\w'-]*)`,
    
  779. 				ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
    
  780. 				nil,
    
  781. 			},
    
  782. 			// :key
    
  783. 			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
    
  784. 			{`\s+`, Text, nil},
    
  785. 		},
    
  786. 		"colon-pair-attribute-keyvalue": {
    
  787. 			// :key(value)
    
  788. 			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
    
  789. 		},
    
  790. 		"escape-qq": {
    
  791. 			{
    
  792. 				`(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
    
  793. 				ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
    
  794. 				nil,
    
  795. 			},
    
  796. 		},
    
  797. 		`escape-char`: {
    
  798. 			{`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
    
  799. 		},
    
  800. 		`escape-single-quote`: {
    
  801. 			{`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
    
  802. 		},
    
  803. 		"escape-c-name": {
    
  804. 			{
    
  805. 				`(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
    
  806. 				ByGroups(StringEscape, Punctuation, String, Punctuation),
    
  807. 				nil,
    
  808. 			},
    
  809. 		},
    
  810. 		"escape-hexadecimal": {
    
  811. 			{
    
  812. 				`(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
    
  813. 				ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
    
  814. 				nil,
    
  815. 			},
    
  816. 			{`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
    
  817. 		},
    
  818. 		"regex": {
    
  819. 			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
    
  820. 			{`\A\z`, nil, nil},
    
  821. 			Include("regex-escape-class"),
    
  822. 			Include(`regex-character-escape`),
    
  823. 			// $(code)
    
  824. 			{
    
  825. 				`([$@])((?<!(?<!\\)\\)\()`,
    
  826. 				ByGroups(Keyword, Punctuation),
    
  827. 				replaceRule(ruleReplacingConfig{
    
  828. 					delimiter: []rune(`)`),
    
  829. 					tokenType: Punctuation,
    
  830. 					stateName: `root`,
    
  831. 					pushState: true,
    
  832. 				}),
    
  833. 			},
    
  834. 			// Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
    
  835. 			{`\$(?=/)`, NameEntity, nil},
    
  836. 			// Exclude $ from variables
    
  837. 			{`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
    
  838. 			Include("variable"),
    
  839. 			Include("escape-c-name"),
    
  840. 			Include("escape-hexadecimal"),
    
  841. 			Include("number"),
    
  842. 			Include("single-quote"),
    
  843. 			// :my variable code ...
    
  844. 			{
    
  845. 				`(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
    
  846. 				ByGroups(Operator, KeywordDeclaration),
    
  847. 				replaceRule(ruleReplacingConfig{
    
  848. 					delimiter: []rune(`;`),
    
  849. 					tokenType: Punctuation,
    
  850. 					stateName: `root`,
    
  851. 					pushState: true,
    
  852. 				}),
    
  853. 			},
    
  854. 			// <{code}>
    
  855. 			{
    
  856. 				`(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
    
  857. 				ByGroups(Punctuation, Operator, Punctuation),
    
  858. 				replaceRule(ruleReplacingConfig{
    
  859. 					delimiter: []rune(`}>`),
    
  860. 					tokenType: Punctuation,
    
  861. 					stateName: `root`,
    
  862. 					pushState: true,
    
  863. 				}),
    
  864. 			},
    
  865. 			// {code}
    
  866. 			Include(`closure`),
    
  867. 			// Properties
    
  868. 			{`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
    
  869. 			// Operator
    
  870. 			{`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
    
  871. 			// Anchors
    
  872. 			{`\^\^|\^|\$\$|\$`, NameEntity, nil},
    
  873. 			{`\.`, NameEntity, nil},
    
  874. 			{`#[^\n]*\n`, CommentSingle, nil},
    
  875. 			// Lookaround
    
  876. 			{
    
  877. 				`(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
    
  878. 				ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
    
  879. 				replaceRule(ruleReplacingConfig{
    
  880. 					delimiter: []rune(`>`),
    
  881. 					tokenType: Punctuation,
    
  882. 					stateName: `regex`,
    
  883. 					pushState: true,
    
  884. 				}),
    
  885. 			},
    
  886. 			{
    
  887. 				`(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
    
  888. 				ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
    
  889. 				nil,
    
  890. 			},
    
  891. 			// <$variable>
    
  892. 			{
    
  893. 				`(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
    
  894. 				ByGroups(Punctuation, Operator, NameVariable, Punctuation),
    
  895. 				nil,
    
  896. 			},
    
  897. 			// Capture markers
    
  898. 			{`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
    
  899. 			{
    
  900. 				`(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
    
  901. 				ByGroups(Punctuation, NameVariable, Operator),
    
  902. 				Push(`regex-variable`),
    
  903. 			},
    
  904. 			{
    
  905. 				`(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
    
  906. 				ByGroups(Punctuation, Operator, NameFunction),
    
  907. 				Push(`regex-function`),
    
  908. 			},
    
  909. 			{`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
    
  910. 			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
    
  911. 			{`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
    
  912. 			{`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
    
  913. 			{`.+?`, StringRegex, nil},
    
  914. 		},
    
  915. 		"regex-class-builtin": {
    
  916. 			{
    
  917. 				`\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
    
  918. 				NameBuiltin,
    
  919. 				nil,
    
  920. 			},
    
  921. 		},
    
  922. 		"regex-function": {
    
  923. 			// <function>
    
  924. 			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
    
  925. 			// <function(parameter)>
    
  926. 			{
    
  927. 				`\(`,
    
  928. 				Punctuation,
    
  929. 				replaceRule(ruleReplacingConfig{
    
  930. 					delimiter: []rune(`)>`),
    
  931. 					tokenType: Punctuation,
    
  932. 					stateName: `root`,
    
  933. 					popState:  true,
    
  934. 					pushState: true,
    
  935. 				}),
    
  936. 			},
    
  937. 			// <function value>
    
  938. 			{
    
  939. 				`\s+`,
    
  940. 				StringRegex,
    
  941. 				replaceRule(ruleReplacingConfig{
    
  942. 					delimiter: []rune(`>`),
    
  943. 					tokenType: Punctuation,
    
  944. 					stateName: `regex`,
    
  945. 					popState:  true,
    
  946. 					pushState: true,
    
  947. 				}),
    
  948. 			},
    
  949. 			// <function: value>
    
  950. 			{
    
  951. 				`:`,
    
  952. 				Punctuation,
    
  953. 				replaceRule(ruleReplacingConfig{
    
  954. 					delimiter: []rune(`>`),
    
  955. 					tokenType: Punctuation,
    
  956. 					stateName: `root`,
    
  957. 					popState:  true,
    
  958. 					pushState: true,
    
  959. 				}),
    
  960. 			},
    
  961. 		},
    
  962. 		"regex-variable": {
    
  963. 			Include(`regex-starting-operators`),
    
  964. 			// <var=function(
    
  965. 			{
    
  966. 				`(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
    
  967. 				ByGroups(Operator, NameFunction),
    
  968. 				Mutators(Pop(1), Push(`regex-function`)),
    
  969. 			},
    
  970. 			// <var=function>
    
  971. 			{`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
    
  972. 			// <var=
    
  973. 			Default(Pop(1), Push(`regex-property`)),
    
  974. 		},
    
  975. 		"regex-property": {
    
  976. 			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
    
  977. 			Include("regex-class-builtin"),
    
  978. 			Include("variable"),
    
  979. 			Include(`regex-starting-operators`),
    
  980. 			Include("colon-pair-attribute"),
    
  981. 			{`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
    
  982. 			{`\+|\-`, Operator, nil},
    
  983. 			{`@[\w':-]+`, NameVariable, nil},
    
  984. 			{`.+?`, StringRegex, nil},
    
  985. 		},
    
  986. 		`regex-starting-operators`: {
    
  987. 			{`(?<=<)[|!?.]+`, Operator, nil},
    
  988. 		},
    
  989. 		"regex-escape-class": {
    
  990. 			{`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
    
  991. 		},
    
  992. 		`regex-character-escape`: {
    
  993. 			{`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
    
  994. 		},
    
  995. 		"regex-character-class": {
    
  996. 			{`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
    
  997. 			Include("regex-escape-class"),
    
  998. 			Include("escape-c-name"),
    
  999. 			Include("escape-hexadecimal"),
    
  1000. 			Include(`regex-character-escape`),
    
  1001. 			Include("number"),
    
  1002. 			{`\.\.`, Operator, nil},
    
  1003. 			{`.+?`, StringRegex, nil},
    
  1004. 		},
    
  1005. 		"metaoperator": {
    
  1006. 			// Z[=>]
    
  1007. 			{
    
  1008. 				`\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
    
  1009. 				ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
    
  1010. 				nil,
    
  1011. 			},
    
  1012. 			// Z=>
    
  1013. 			{`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
    
  1014. 		},
    
  1015. 		"operator": {
    
  1016. 			// Word Operator
    
  1017. 			{wordOperatorsPattern, OperatorWord, nil},
    
  1018. 			// Operator
    
  1019. 			{operatorsPattern, Operator, nil},
    
  1020. 		},
    
  1021. 		"pod": {
    
  1022. 			// Single-line pod declaration
    
  1023. 			{`(#[|=])\s`, Keyword, Push("pod-single")},
    
  1024. 			// Multi-line pod declaration
    
  1025. 			{
    
  1026. 				"(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
    
  1027. 				ByGroupNames(
    
  1028. 					map[string]Emitter{
    
  1029. 						`keyword`:            Keyword,
    
  1030. 						`opening_delimiters`: Punctuation,
    
  1031. 						`delimiter`:          nil,
    
  1032. 						`value`:              UsingSelf("pod-declaration"),
    
  1033. 						`closing_delimiters`: Punctuation,
    
  1034. 					}),
    
  1035. 				findBrackets(rakuPodDeclaration),
    
  1036. 			},
    
  1037. 			Include("pod-blocks"),
    
  1038. 		},
    
  1039. 		"pod-blocks": {
    
  1040. 			// =begin code
    
  1041. 			{
    
  1042. 				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
    
  1043. 				EmitterFunc(podCode),
    
  1044. 				nil,
    
  1045. 			},
    
  1046. 			// =begin
    
  1047. 			{
    
  1048. 				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
    
  1049. 				ByGroupNames(
    
  1050. 					map[string]Emitter{
    
  1051. 						`ws`:                 Comment,
    
  1052. 						`keyword`:            Keyword,
    
  1053. 						`ws2`:                StringDoc,
    
  1054. 						`name`:               Keyword,
    
  1055. 						`config`:             EmitterFunc(podConfig),
    
  1056. 						`value`:              UsingSelf("pod-begin"),
    
  1057. 						`closing_delimiters`: Keyword,
    
  1058. 					}),
    
  1059. 				findBrackets(rakuPod),
    
  1060. 			},
    
  1061. 			// =for ...
    
  1062. 			{
    
  1063. 				`(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
    
  1064. 				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
    
  1065. 				Push("pod-paragraph"),
    
  1066. 			},
    
  1067. 			// =config
    
  1068. 			{
    
  1069. 				`(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
    
  1070. 				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
    
  1071. 				nil,
    
  1072. 			},
    
  1073. 			// =alias
    
  1074. 			{
    
  1075. 				`(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
    
  1076. 				ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
    
  1077. 				nil,
    
  1078. 			},
    
  1079. 			// =encoding
    
  1080. 			{
    
  1081. 				`(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
    
  1082. 				ByGroups(Comment, Keyword, StringDoc, Name),
    
  1083. 				nil,
    
  1084. 			},
    
  1085. 			// =para ...
    
  1086. 			{
    
  1087. 				`(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
    
  1088. 				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
    
  1089. 				Push("pod-paragraph"),
    
  1090. 			},
    
  1091. 			// =head1 ...
    
  1092. 			{
    
  1093. 				`(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
    
  1094. 				ByGroups(Comment, Keyword, GenericHeading, Keyword),
    
  1095. 				Push("pod-heading"),
    
  1096. 			},
    
  1097. 			// =item ...
    
  1098. 			{
    
  1099. 				`(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
    
  1100. 				ByGroups(Comment, Keyword, StringDoc, Keyword),
    
  1101. 				Push("pod-paragraph"),
    
  1102. 			},
    
  1103. 			{
    
  1104. 				`(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
    
  1105. 				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
    
  1106. 				Push("pod-finish"),
    
  1107. 			},
    
  1108. 			// ={custom} ...
    
  1109. 			{
    
  1110. 				`(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
    
  1111. 				ByGroups(Comment, Name, StringDoc, Keyword),
    
  1112. 				Push("pod-paragraph"),
    
  1113. 			},
    
  1114. 			// = podconfig
    
  1115. 			{
    
  1116. 				`(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
    
  1117. 					colonPairClosingBrackets + `) *)*\n)`,
    
  1118. 				ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
    
  1119. 				nil,
    
  1120. 			},
    
  1121. 		},
    
  1122. 		"pod-begin": {
    
  1123. 			Include("pod-blocks"),
    
  1124. 			Include("pre-pod-formatter"),
    
  1125. 			{`.+?`, StringDoc, nil},
    
  1126. 		},
    
  1127. 		"pod-declaration": {
    
  1128. 			Include("pre-pod-formatter"),
    
  1129. 			{`.+?`, StringDoc, nil},
    
  1130. 		},
    
  1131. 		"pod-paragraph": {
    
  1132. 			{`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
    
  1133. 			Include("pre-pod-formatter"),
    
  1134. 			{`.+?`, StringDoc, nil},
    
  1135. 		},
    
  1136. 		"pod-single": {
    
  1137. 			{`\n`, StringDoc, Pop(1)},
    
  1138. 			Include("pre-pod-formatter"),
    
  1139. 			{`.+?`, StringDoc, nil},
    
  1140. 		},
    
  1141. 		"pod-heading": {
    
  1142. 			{`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
    
  1143. 			Include("pre-pod-formatter"),
    
  1144. 			{`.+?`, GenericHeading, nil},
    
  1145. 		},
    
  1146. 		"pod-finish": {
    
  1147. 			{`\z`, nil, Pop(1)},
    
  1148. 			Include("pre-pod-formatter"),
    
  1149. 			{`.+?`, StringDoc, nil},
    
  1150. 		},
    
  1151. 		"pre-pod-formatter": {
    
  1152. 			// C<code>, B<bold>, ...
    
  1153. 			{
    
  1154. 				`(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
    
  1155. 				ByGroups(Keyword, Punctuation),
    
  1156. 				findBrackets(rakuPodFormatter),
    
  1157. 			},
    
  1158. 		},
    
  1159. 		"pod-formatter": {
    
  1160. 			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
    
  1161. 			{`>`, Punctuation, Pop(1)},
    
  1162. 			Include("pre-pod-formatter"),
    
  1163. 			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
    
  1164. 			{`.+?`, StringOther, nil},
    
  1165. 		},
    
  1166. 		"variable": {
    
  1167. 			{variablePattern, NameVariable, Push("name-adverb")},
    
  1168. 			{globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
    
  1169. 			{`[$@]<[^>]+>`, NameVariable, nil},
    
  1170. 			{`\$[/!¢]`, NameVariable, nil},
    
  1171. 			{`[$@%]`, NameVariable, nil},
    
  1172. 		},
    
  1173. 		"single-quote": {
    
  1174. 			{`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
    
  1175. 		},
    
  1176. 		"single-quote-inner": {
    
  1177. 			{`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
    
  1178. 			Include("escape-single-quote"),
    
  1179. 			Include("escape-qq"),
    
  1180. 			{`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
    
  1181. 		},
    
  1182. 		"double-quotes": {
    
  1183. 			{`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
    
  1184. 			Include("qq"),
    
  1185. 		},
    
  1186. 		"<<": {
    
  1187. 			{`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
    
  1188. 			Include("ww"),
    
  1189. 		},
    
  1190. 		"«": {
    
  1191. 			{`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
    
  1192. 			Include("ww"),
    
  1193. 		},
    
  1194. 		"ww": {
    
  1195. 			Include("single-quote"),
    
  1196. 			Include("qq"),
    
  1197. 		},
    
  1198. 		"qq": {
    
  1199. 			Include("qq-variable"),
    
  1200. 			Include("closure"),
    
  1201. 			Include(`escape-char`),
    
  1202. 			Include("escape-hexadecimal"),
    
  1203. 			Include("escape-c-name"),
    
  1204. 			Include("escape-qq"),
    
  1205. 			{`.+?`, StringDouble, nil},
    
  1206. 		},
    
  1207. 		"qq-variable": {
    
  1208. 			{
    
  1209. 				`(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
    
  1210. 				NameVariable,
    
  1211. 				Push("qq-variable-extras", "name-adverb"),
    
  1212. 			},
    
  1213. 		},
    
  1214. 		"qq-variable-extras": {
    
  1215. 			// Method
    
  1216. 			{
    
  1217. 				`(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
    
  1218. 				ByGroupNames(map[string]Emitter{
    
  1219. 					`operator`:    Operator,
    
  1220. 					`method_name`: NameFunction,
    
  1221. 				}),
    
  1222. 				Push(`name-adverb`),
    
  1223. 			},
    
  1224. 			// Function/Signature
    
  1225. 			{
    
  1226. 				`\(`, Punctuation, replaceRule(
    
  1227. 					ruleReplacingConfig{
    
  1228. 						delimiter: []rune(`)`),
    
  1229. 						tokenType: Punctuation,
    
  1230. 						stateName: `root`,
    
  1231. 						pushState: true,
    
  1232. 					}),
    
  1233. 			},
    
  1234. 			Default(Pop(1)),
    
  1235. 		},
    
  1236. 		"Q": {
    
  1237. 			Include("escape-qq"),
    
  1238. 			{`.+?`, String, nil},
    
  1239. 		},
    
  1240. 		"Q-closure": {
    
  1241. 			Include("escape-qq"),
    
  1242. 			Include("closure"),
    
  1243. 			{`.+?`, String, nil},
    
  1244. 		},
    
  1245. 		"Q-variable": {
    
  1246. 			Include("escape-qq"),
    
  1247. 			Include("qq-variable"),
    
  1248. 			{`.+?`, String, nil},
    
  1249. 		},
    
  1250. 		"closure": {
    
  1251. 			{`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
    
  1252. 				ruleReplacingConfig{
    
  1253. 					delimiter: []rune(`}`),
    
  1254. 					tokenType: Punctuation,
    
  1255. 					stateName: `root`,
    
  1256. 					pushState: true,
    
  1257. 				}),
    
  1258. 			},
    
  1259. 		},
    
  1260. 		"token": {
    
  1261. 			// Token signature
    
  1262. 			{`\(`, Punctuation, replaceRule(
    
  1263. 				ruleReplacingConfig{
    
  1264. 					delimiter: []rune(`)`),
    
  1265. 					tokenType: Punctuation,
    
  1266. 					stateName: `root`,
    
  1267. 					pushState: true,
    
  1268. 				}),
    
  1269. 			},
    
  1270. 			{`{`, Punctuation, replaceRule(
    
  1271. 				ruleReplacingConfig{
    
  1272. 					delimiter: []rune(`}`),
    
  1273. 					tokenType: Punctuation,
    
  1274. 					stateName: `regex`,
    
  1275. 					popState:  true,
    
  1276. 					pushState: true,
    
  1277. 				}),
    
  1278. 			},
    
  1279. 			{`\s*`, Text, nil},
    
  1280. 			Default(Pop(1)),
    
  1281. 		},
    
  1282. 	}
    
  1283. }
    
  1284. 
    
  1285. // Joins keys of rune map
    
  1286. func joinRuneMap(m map[rune]rune) string {
    
  1287. 	runes := make([]rune, 0, len(m))
    
  1288. 	for k := range m {
    
  1289. 		runes = append(runes, k)
    
  1290. 	}
    
  1291. 
    
  1292. 	return string(runes)
    
  1293. }
    
  1294. 
    
  1295. // Finds the index of substring in the string starting at position n
    
  1296. func indexAt(str []rune, substr []rune, pos int) int {
    
  1297. 	strFromPos := str[pos:]
    
  1298. 	text := string(strFromPos)
    
  1299. 
    
  1300. 	idx := strings.Index(text, string(substr))
    
  1301. 	if idx > -1 {
    
  1302. 		idx = utf8.RuneCountInString(text[:idx])
    
  1303. 
    
  1304. 		// Search again if the substr is escaped with backslash
    
  1305. 		if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
    
  1306. 			(idx == 1 && strFromPos[idx-1] == '\\') {
    
  1307. 			idx = indexAt(str[pos:], substr, idx+1)
    
  1308. 
    
  1309. 			idx = utf8.RuneCountInString(text[:idx])
    
  1310. 
    
  1311. 			if idx < 0 {
    
  1312. 				return idx
    
  1313. 			}
    
  1314. 		}
    
  1315. 		idx += pos
    
  1316. 	}
    
  1317. 
    
  1318. 	return idx
    
  1319. }
    
  1320. 
    
  1321. // Tells if an array of string contains a string
    
  1322. func contains(s []string, e string) bool {
    
  1323. 	for _, value := range s {
    
  1324. 		if value == e {
    
  1325. 			return true
    
  1326. 		}
    
  1327. 	}
    
  1328. 	return false
    
  1329. }
    
  1330. 
    
  1331. type rulePosition int
    
  1332. 
    
  1333. const (
    
  1334. 	topRule    rulePosition = 0
    
  1335. 	bottomRule              = -1
    
  1336. )
    
  1337. 
    
  1338. type ruleMakingConfig struct {
    
  1339. 	delimiter              []rune
    
  1340. 	pattern                string
    
  1341. 	tokenType              Emitter
    
  1342. 	mutator                Mutator
    
  1343. 	numberOfDelimiterChars int
    
  1344. }
    
  1345. 
    
  1346. type ruleReplacingConfig struct {
    
  1347. 	delimiter              []rune
    
  1348. 	pattern                string
    
  1349. 	tokenType              Emitter
    
  1350. 	numberOfDelimiterChars int
    
  1351. 	mutator                Mutator
    
  1352. 	appendMutator          Mutator
    
  1353. 	rulePosition           rulePosition
    
  1354. 	stateName              string
    
  1355. 	pop                    bool
    
  1356. 	popState               bool
    
  1357. 	pushState              bool
    
  1358. }
    
  1359. 
    
  1360. // Pops rule from state-stack and replaces the rule with the previous rule
    
  1361. func popRule(rule ruleReplacingConfig) MutatorFunc {
    
  1362. 	return func(state *LexerState) error {
    
  1363. 		stackName := genStackName(rule.stateName, rule.rulePosition)
    
  1364. 
    
  1365. 		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
    
  1366. 
    
  1367. 		if ok && len(stack) > 0 {
    
  1368. 			// Pop from stack
    
  1369. 			stack = stack[:len(stack)-1]
    
  1370. 			lastRule := stack[len(stack)-1]
    
  1371. 			lastRule.pushState = false
    
  1372. 			lastRule.popState = false
    
  1373. 			lastRule.pop = true
    
  1374. 			state.Set(stackName, stack)
    
  1375. 
    
  1376. 			// Call replaceRule to use the last rule
    
  1377. 			err := replaceRule(lastRule)(state)
    
  1378. 			if err != nil {
    
  1379. 				panic(err)
    
  1380. 			}
    
  1381. 		}
    
  1382. 
    
  1383. 		return nil
    
  1384. 	}
    
  1385. }
    
  1386. 
    
  1387. // Replaces a state's rule based on the rule config and position
    
  1388. func replaceRule(rule ruleReplacingConfig) MutatorFunc {
    
  1389. 	return func(state *LexerState) error {
    
  1390. 		stateName := rule.stateName
    
  1391. 		stackName := genStackName(rule.stateName, rule.rulePosition)
    
  1392. 
    
  1393. 		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
    
  1394. 		if !ok {
    
  1395. 			stack = []ruleReplacingConfig{}
    
  1396. 		}
    
  1397. 
    
  1398. 		// If state-stack is empty fill it with the placeholder rule
    
  1399. 		if len(stack) == 0 {
    
  1400. 			stack = []ruleReplacingConfig{
    
  1401. 				{
    
  1402. 					// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
    
  1403. 					pattern:      `\A\z`,
    
  1404. 					tokenType:    nil,
    
  1405. 					mutator:      nil,
    
  1406. 					stateName:    stateName,
    
  1407. 					rulePosition: rule.rulePosition,
    
  1408. 				},
    
  1409. 			}
    
  1410. 			state.Set(stackName, stack)
    
  1411. 		}
    
  1412. 
    
  1413. 		var mutator Mutator
    
  1414. 		mutators := []Mutator{}
    
  1415. 
    
  1416. 		switch {
    
  1417. 		case rule.rulePosition == topRule && rule.mutator == nil:
    
  1418. 			// Default mutator for top rule
    
  1419. 			mutators = []Mutator{Pop(1), popRule(rule)}
    
  1420. 		case rule.rulePosition == topRule && rule.mutator != nil:
    
  1421. 			// Default mutator for top rule, when rule.mutator is set
    
  1422. 			mutators = []Mutator{rule.mutator, popRule(rule)}
    
  1423. 		case rule.mutator != nil:
    
  1424. 			mutators = []Mutator{rule.mutator}
    
  1425. 		}
    
  1426. 
    
  1427. 		if rule.appendMutator != nil {
    
  1428. 			mutators = append(mutators, rule.appendMutator)
    
  1429. 		}
    
  1430. 
    
  1431. 		if len(mutators) > 0 {
    
  1432. 			mutator = Mutators(mutators...)
    
  1433. 		} else {
    
  1434. 			mutator = nil
    
  1435. 		}
    
  1436. 
    
  1437. 		ruleConfig := ruleMakingConfig{
    
  1438. 			pattern:                rule.pattern,
    
  1439. 			delimiter:              rule.delimiter,
    
  1440. 			numberOfDelimiterChars: rule.numberOfDelimiterChars,
    
  1441. 			tokenType:              rule.tokenType,
    
  1442. 			mutator:                mutator,
    
  1443. 		}
    
  1444. 
    
  1445. 		cRule := makeRule(ruleConfig)
    
  1446. 
    
  1447. 		switch rule.rulePosition {
    
  1448. 		case topRule:
    
  1449. 			state.Rules[stateName][0] = cRule
    
  1450. 		case bottomRule:
    
  1451. 			state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
    
  1452. 		}
    
  1453. 
    
  1454. 		// Pop state name from stack if asked. State should be popped first before Pushing
    
  1455. 		if rule.popState {
    
  1456. 			err := Pop(1).Mutate(state)
    
  1457. 			if err != nil {
    
  1458. 				panic(err)
    
  1459. 			}
    
  1460. 		}
    
  1461. 
    
  1462. 		// Push state name to stack if asked
    
  1463. 		if rule.pushState {
    
  1464. 			err := Push(stateName).Mutate(state)
    
  1465. 			if err != nil {
    
  1466. 				panic(err)
    
  1467. 			}
    
  1468. 		}
    
  1469. 
    
  1470. 		if !rule.pop {
    
  1471. 			state.Set(stackName, append(stack, rule))
    
  1472. 		}
    
  1473. 
    
  1474. 		return nil
    
  1475. 	}
    
  1476. }
    
  1477. 
    
  1478. // Generates rule replacing stack using state name and rule position
    
  1479. func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
    
  1480. 	switch rulePosition {
    
  1481. 	case topRule:
    
  1482. 		stackName = stateName + `-top-stack`
    
  1483. 	case bottomRule:
    
  1484. 		stackName = stateName + `-bottom-stack`
    
  1485. 	}
    
  1486. 	return
    
  1487. }
    
  1488. 
    
  1489. // Makes a compiled rule and returns it
    
  1490. func makeRule(config ruleMakingConfig) *CompiledRule {
    
  1491. 	var rePattern string
    
  1492. 
    
  1493. 	if len(config.delimiter) > 0 {
    
  1494. 		delimiter := string(config.delimiter)
    
  1495. 
    
  1496. 		if config.numberOfDelimiterChars > 1 {
    
  1497. 			delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
    
  1498. 		}
    
  1499. 
    
  1500. 		rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
    
  1501. 	} else {
    
  1502. 		rePattern = config.pattern
    
  1503. 	}
    
  1504. 
    
  1505. 	regex := regexp2.MustCompile(rePattern, regexp2.None)
    
  1506. 
    
  1507. 	cRule := &CompiledRule{
    
  1508. 		Rule:   Rule{rePattern, config.tokenType, config.mutator},
    
  1509. 		Regexp: regex,
    
  1510. 	}
    
  1511. 
    
  1512. 	return cRule
    
  1513. }
    
  1514. 
    
  1515. // Emitter for colon pairs, changes token state based on key and brackets
    
  1516. func colonPair(tokenClass TokenType) Emitter {
    
  1517. 	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
    
  1518. 		iterators := []Iterator{}
    
  1519. 		tokens := []Token{
    
  1520. 			{Punctuation, state.NamedGroups[`colon`]},
    
  1521. 			{Punctuation, state.NamedGroups[`opening_delimiters`]},
    
  1522. 			{Punctuation, state.NamedGroups[`closing_delimiters`]},
    
  1523. 		}
    
  1524. 
    
  1525. 		// Append colon
    
  1526. 		iterators = append(iterators, Literator(tokens[0]))
    
  1527. 
    
  1528. 		if tokenClass == NameAttribute {
    
  1529. 			iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
    
  1530. 		} else {
    
  1531. 			var keyTokenState string
    
  1532. 			keyre := regexp.MustCompile(`^\d+$`)
    
  1533. 			if keyre.MatchString(state.NamedGroups[`key`]) {
    
  1534. 				keyTokenState = "common"
    
  1535. 			} else {
    
  1536. 				keyTokenState = "Q"
    
  1537. 			}
    
  1538. 
    
  1539. 			// Use token state to Tokenise key
    
  1540. 			if keyTokenState != "" {
    
  1541. 				iterator, err := state.Lexer.Tokenise(
    
  1542. 					&TokeniseOptions{
    
  1543. 						State:  keyTokenState,
    
  1544. 						Nested: true,
    
  1545. 					}, state.NamedGroups[`key`])
    
  1546. 
    
  1547. 				if err != nil {
    
  1548. 					panic(err)
    
  1549. 				} else {
    
  1550. 					// Append key
    
  1551. 					iterators = append(iterators, iterator)
    
  1552. 				}
    
  1553. 			}
    
  1554. 		}
    
  1555. 
    
  1556. 		// Append punctuation
    
  1557. 		iterators = append(iterators, Literator(tokens[1]))
    
  1558. 
    
  1559. 		var valueTokenState string
    
  1560. 
    
  1561. 		switch state.NamedGroups[`opening_delimiters`] {
    
  1562. 		case "(", "{", "[":
    
  1563. 			valueTokenState = "root"
    
  1564. 		case "<<", "«":
    
  1565. 			valueTokenState = "ww"
    
  1566. 		case "<":
    
  1567. 			valueTokenState = "Q"
    
  1568. 		}
    
  1569. 
    
  1570. 		// Use token state to Tokenise value
    
  1571. 		if valueTokenState != "" {
    
  1572. 			iterator, err := state.Lexer.Tokenise(
    
  1573. 				&TokeniseOptions{
    
  1574. 					State:  valueTokenState,
    
  1575. 					Nested: true,
    
  1576. 				}, state.NamedGroups[`value`])
    
  1577. 
    
  1578. 			if err != nil {
    
  1579. 				panic(err)
    
  1580. 			} else {
    
  1581. 				// Append value
    
  1582. 				iterators = append(iterators, iterator)
    
  1583. 			}
    
  1584. 		}
    
  1585. 		// Append last punctuation
    
  1586. 		iterators = append(iterators, Literator(tokens[2]))
    
  1587. 
    
  1588. 		return Concaterator(iterators...)
    
  1589. 	})
    
  1590. }
    
  1591. 
    
  1592. // Emitter for quoting constructs, changes token state based on quote name and adverbs
    
  1593. func quote(groups []string, state *LexerState) Iterator {
    
  1594. 	keyword := state.NamedGroups[`keyword`]
    
  1595. 	adverbsStr := state.NamedGroups[`adverbs`]
    
  1596. 	iterators := []Iterator{}
    
  1597. 	tokens := []Token{
    
  1598. 		{Keyword, keyword},
    
  1599. 		{StringAffix, adverbsStr},
    
  1600. 		{Text, state.NamedGroups[`ws`]},
    
  1601. 		{Punctuation, state.NamedGroups[`opening_delimiters`]},
    
  1602. 		{Punctuation, state.NamedGroups[`closing_delimiters`]},
    
  1603. 	}
    
  1604. 
    
  1605. 	// Append all tokens before dealing with the main string
    
  1606. 	iterators = append(iterators, Literator(tokens[:4]...))
    
  1607. 
    
  1608. 	var tokenStates []string
    
  1609. 
    
  1610. 	// Set tokenStates based on adverbs
    
  1611. 	adverbs := strings.Split(adverbsStr, ":")
    
  1612. 	for _, adverb := range adverbs {
    
  1613. 		switch adverb {
    
  1614. 		case "c", "closure":
    
  1615. 			tokenStates = append(tokenStates, "Q-closure")
    
  1616. 		case "qq":
    
  1617. 			tokenStates = append(tokenStates, "qq")
    
  1618. 		case "ww":
    
  1619. 			tokenStates = append(tokenStates, "ww")
    
  1620. 		case "s", "scalar", "a", "array", "h", "hash", "f", "function":
    
  1621. 			tokenStates = append(tokenStates, "Q-variable")
    
  1622. 		}
    
  1623. 	}
    
  1624. 
    
  1625. 	var tokenState string
    
  1626. 
    
  1627. 	switch {
    
  1628. 	case keyword == "qq" || contains(tokenStates, "qq"):
    
  1629. 		tokenState = "qq"
    
  1630. 	case adverbsStr == "ww" || contains(tokenStates, "ww"):
    
  1631. 		tokenState = "ww"
    
  1632. 	case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
    
  1633. 		tokenState = "qq"
    
  1634. 	case contains(tokenStates, "Q-closure"):
    
  1635. 		tokenState = "Q-closure"
    
  1636. 	case contains(tokenStates, "Q-variable"):
    
  1637. 		tokenState = "Q-variable"
    
  1638. 	default:
    
  1639. 		tokenState = "Q"
    
  1640. 	}
    
  1641. 
    
  1642. 	iterator, err := state.Lexer.Tokenise(
    
  1643. 		&TokeniseOptions{
    
  1644. 			State:  tokenState,
    
  1645. 			Nested: true,
    
  1646. 		}, state.NamedGroups[`value`])
    
  1647. 
    
  1648. 	if err != nil {
    
  1649. 		panic(err)
    
  1650. 	} else {
    
  1651. 		iterators = append(iterators, iterator)
    
  1652. 	}
    
  1653. 
    
  1654. 	// Append the last punctuation
    
  1655. 	iterators = append(iterators, Literator(tokens[4]))
    
  1656. 
    
  1657. 	return Concaterator(iterators...)
    
  1658. }
    
  1659. 
    
  1660. // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
    
  1661. func podConfig(groups []string, state *LexerState) Iterator {
    
  1662. 	// Tokenise pod config
    
  1663. 	iterator, err := state.Lexer.Tokenise(
    
  1664. 		&TokeniseOptions{
    
  1665. 			State:  "colon-pair-attribute",
    
  1666. 			Nested: true,
    
  1667. 		}, groups[0])
    
  1668. 
    
  1669. 	if err != nil {
    
  1670. 		panic(err)
    
  1671. 	} else {
    
  1672. 		return iterator
    
  1673. 	}
    
  1674. }
    
  1675. 
    
  1676. // Emitter for pod code, tokenises the code based on the lang specified
    
  1677. func podCode(groups []string, state *LexerState) Iterator {
    
  1678. 	iterators := []Iterator{}
    
  1679. 	tokens := []Token{
    
  1680. 		{Comment, state.NamedGroups[`ws`]},
    
  1681. 		{Keyword, state.NamedGroups[`keyword`]},
    
  1682. 		{Keyword, state.NamedGroups[`ws2`]},
    
  1683. 		{Keyword, state.NamedGroups[`name`]},
    
  1684. 		{StringDoc, state.NamedGroups[`value`]},
    
  1685. 		{Comment, state.NamedGroups[`ws3`]},
    
  1686. 		{Keyword, state.NamedGroups[`end_keyword`]},
    
  1687. 		{Keyword, state.NamedGroups[`ws4`]},
    
  1688. 		{Keyword, state.NamedGroups[`name`]},
    
  1689. 	}
    
  1690. 
    
  1691. 	// Append all tokens before dealing with the pod config
    
  1692. 	iterators = append(iterators, Literator(tokens[:4]...))
    
  1693. 
    
  1694. 	// Tokenise pod config
    
  1695. 	iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
    
  1696. 
    
  1697. 	langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
    
  1698. 	var lang string
    
  1699. 	if len(langMatch) > 1 {
    
  1700. 		lang = langMatch[1]
    
  1701. 	}
    
  1702. 
    
  1703. 	// Tokenise code based on lang property
    
  1704. 	sublexer := Get(lang)
    
  1705. 	if sublexer != nil {
    
  1706. 		iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
    
  1707. 
    
  1708. 		if err != nil {
    
  1709. 			panic(err)
    
  1710. 		} else {
    
  1711. 			iterators = append(iterators, iterator)
    
  1712. 		}
    
  1713. 	} else {
    
  1714. 		iterators = append(iterators, Literator(tokens[4]))
    
  1715. 	}
    
  1716. 
    
  1717. 	// Append the rest of the tokens
    
  1718. 	iterators = append(iterators, Literator(tokens[5:]...))
    
  1719. 
    
  1720. 	return Concaterator(iterators...)
    
  1721. }