1. package chroma
    
  2. 
    
  3. import (
    
  4. 	"fmt"
    
  5. )
    
  6. 
    
  7. // An Emitter takes group matches and returns tokens.
    
  8. type Emitter interface {
    
  9. 	// Emit tokens for the given regex groups.
    
  10. 	Emit(groups []string, state *LexerState) Iterator
    
  11. }
    
  12. 
    
  13. // SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
    
  14. type SerialisableEmitter interface {
    
  15. 	Emitter
    
  16. 	EmitterKind() string
    
  17. }
    
  18. 
    
  19. // EmitterFunc is a function that is an Emitter.
    
  20. type EmitterFunc func(groups []string, state *LexerState) Iterator
    
  21. 
    
  22. // Emit tokens for groups.
    
  23. func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
    
  24. 	return e(groups, state)
    
  25. }
    
  26. 
    
  27. type Emitters []Emitter
    
  28. 
    
  29. type byGroupsEmitter struct {
    
  30. 	Emitters
    
  31. }
    
  32. 
    
  33. // ByGroups emits a token for each matching group in the rule's regex.
    
  34. func ByGroups(emitters ...Emitter) Emitter {
    
  35. 	return &byGroupsEmitter{Emitters: emitters}
    
  36. }
    
  37. 
    
  38. func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
    
  39. 
    
  40. func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
    
  41. 	iterators := make([]Iterator, 0, len(groups)-1)
    
  42. 	if len(b.Emitters) != len(groups)-1 {
    
  43. 		iterators = append(iterators, Error.Emit(groups, state))
    
  44. 		// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
    
  45. 	} else {
    
  46. 		for i, group := range groups[1:] {
    
  47. 			if b.Emitters[i] != nil {
    
  48. 				iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
    
  49. 			}
    
  50. 		}
    
  51. 	}
    
  52. 	return Concaterator(iterators...)
    
  53. }
    
  54. 
    
  55. // ByGroupNames emits a token for each named matching group in the rule's regex.
    
  56. func ByGroupNames(emitters map[string]Emitter) Emitter {
    
  57. 	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
    
  58. 		iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
    
  59. 		if len(state.NamedGroups)-1 == 0 {
    
  60. 			if emitter, ok := emitters[`0`]; ok {
    
  61. 				iterators = append(iterators, emitter.Emit(groups, state))
    
  62. 			} else {
    
  63. 				iterators = append(iterators, Error.Emit(groups, state))
    
  64. 			}
    
  65. 		} else {
    
  66. 			ruleRegex := state.Rules[state.State][state.Rule].Regexp
    
  67. 			for i := 1; i < len(state.NamedGroups); i++ {
    
  68. 				groupName := ruleRegex.GroupNameFromNumber(i)
    
  69. 				group := state.NamedGroups[groupName]
    
  70. 				if emitter, ok := emitters[groupName]; ok {
    
  71. 					if emitter != nil {
    
  72. 						iterators = append(iterators, emitter.Emit([]string{group}, state))
    
  73. 					}
    
  74. 				} else {
    
  75. 					iterators = append(iterators, Error.Emit([]string{group}, state))
    
  76. 				}
    
  77. 			}
    
  78. 		}
    
  79. 		return Concaterator(iterators...)
    
  80. 	})
    
  81. }
    
  82. 
    
  83. // UsingByGroup emits tokens for the matched groups in the regex using a
    
  84. // sublexer. Used when lexing code blocks where the name of a sublexer is
    
  85. // contained within the block, for example on a Markdown text block or SQL
    
  86. // language block.
    
  87. //
    
  88. // An attempt to load the sublexer will be made using the captured value from
    
  89. // the text of the matched sublexerNameGroup. If a sublexer matching the
    
  90. // sublexerNameGroup is available, then tokens for the matched codeGroup will
    
  91. // be emitted using the sublexer. Otherwise, if no sublexer is available, then
    
  92. // tokens will be emitted from the passed emitter.
    
  93. //
    
  94. // Example:
    
  95. //
    
  96. //	var Markdown = internal.Register(MustNewLexer(
    
  97. //		&Config{
    
  98. //			Name:      "markdown",
    
  99. //			Aliases:   []string{"md", "mkd"},
    
  100. //			Filenames: []string{"*.md", "*.mkd", "*.markdown"},
    
  101. //			MimeTypes: []string{"text/x-markdown"},
    
  102. //		},
    
  103. //		Rules{
    
  104. //			"root": {
    
  105. //				{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
    
  106. //					UsingByGroup(
    
  107. //						2, 4,
    
  108. //						String, String, String, Text, String,
    
  109. //					),
    
  110. //					nil,
    
  111. //				},
    
  112. //			},
    
  113. //		},
    
  114. //	))
    
  115. //
    
  116. // See the lexers/markdown.go for the complete example.
    
  117. //
    
  118. // Note: panic's if the number of emitters does not equal the number of matched
    
  119. // groups in the regex.
    
  120. func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
    
  121. 	return &usingByGroup{
    
  122. 		SublexerNameGroup: sublexerNameGroup,
    
  123. 		CodeGroup:         codeGroup,
    
  124. 		Emitters:          emitters,
    
  125. 	}
    
  126. }
    
  127. 
    
  128. type usingByGroup struct {
    
  129. 	SublexerNameGroup int      `xml:"sublexer_name_group"`
    
  130. 	CodeGroup         int      `xml:"code_group"`
    
  131. 	Emitters          Emitters `xml:"emitters"`
    
  132. }
    
  133. 
    
  134. func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
    
  135. func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
    
  136. 	// bounds check
    
  137. 	if len(u.Emitters) != len(groups)-1 {
    
  138. 		panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
    
  139. 	}
    
  140. 
    
  141. 	// grab sublexer
    
  142. 	sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
    
  143. 
    
  144. 	// build iterators
    
  145. 	iterators := make([]Iterator, len(groups)-1)
    
  146. 	for i, group := range groups[1:] {
    
  147. 		if i == u.CodeGroup-1 && sublexer != nil {
    
  148. 			var err error
    
  149. 			iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
    
  150. 			if err != nil {
    
  151. 				panic(err)
    
  152. 			}
    
  153. 		} else if u.Emitters[i] != nil {
    
  154. 			iterators[i] = u.Emitters[i].Emit([]string{group}, state)
    
  155. 		}
    
  156. 	}
    
  157. 	return Concaterator(iterators...)
    
  158. }
    
  159. 
    
  160. // UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
    
  161. //
    
  162. // This Emitter is not serialisable.
    
  163. func UsingLexer(lexer Lexer) Emitter {
    
  164. 	return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
    
  165. 		it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
    
  166. 		if err != nil {
    
  167. 			panic(err)
    
  168. 		}
    
  169. 		return it
    
  170. 	})
    
  171. }
    
  172. 
    
  173. type usingEmitter struct {
    
  174. 	Lexer string `xml:"lexer,attr"`
    
  175. }
    
  176. 
    
  177. func (u *usingEmitter) EmitterKind() string { return "using" }
    
  178. 
    
  179. func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
    
  180. 	if state.Registry == nil {
    
  181. 		panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
    
  182. 	}
    
  183. 	lexer := state.Registry.Get(u.Lexer)
    
  184. 	if lexer == nil {
    
  185. 		panic(fmt.Sprintf("no such lexer %q", u.Lexer))
    
  186. 	}
    
  187. 	it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
    
  188. 	if err != nil {
    
  189. 		panic(err)
    
  190. 	}
    
  191. 	return it
    
  192. }
    
  193. 
    
  194. // Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
    
  195. //
    
  196. // The referenced lexer must be stored in the same LexerRegistry.
    
  197. func Using(lexer string) Emitter {
    
  198. 	return &usingEmitter{Lexer: lexer}
    
  199. }
    
  200. 
    
  201. type usingSelfEmitter struct {
    
  202. 	State string `xml:"state,attr"`
    
  203. }
    
  204. 
    
  205. func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
    
  206. 
    
  207. func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
    
  208. 	it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
    
  209. 	if err != nil {
    
  210. 		panic(err)
    
  211. 	}
    
  212. 	return it
    
  213. }
    
  214. 
    
  215. // UsingSelf is like Using, but uses the current Lexer.
    
  216. func UsingSelf(stateName string) Emitter {
    
  217. 	return &usingSelfEmitter{stateName}
    
  218. }