1. package chroma
    
  2. 
    
  3. import (
    
  4. 	"bytes"
    
  5. )
    
  6. 
    
  7. type delegatingLexer struct {
    
  8. 	root     Lexer
    
  9. 	language Lexer
    
  10. }
    
  11. 
    
  12. // DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
    
  13. // inside HTML or PHP inside plain text.
    
  14. //
    
  15. // It takes two lexer as arguments: a root lexer and a language lexer.  First everything is scanned using the language
    
  16. // lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
    
  17. // Finally, these two sets of tokens are merged.
    
  18. //
    
  19. // The lexers from the template lexer package use this base lexer.
    
  20. func DelegatingLexer(root Lexer, language Lexer) Lexer {
    
  21. 	return &delegatingLexer{
    
  22. 		root:     root,
    
  23. 		language: language,
    
  24. 	}
    
  25. }
    
  26. 
    
  27. func (d *delegatingLexer) AnalyseText(text string) float32 {
    
  28. 	return d.root.AnalyseText(text)
    
  29. }
    
  30. 
    
  31. func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
    
  32. 	d.root.SetAnalyser(analyser)
    
  33. 	return d
    
  34. }
    
  35. 
    
  36. func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
    
  37. 	d.root.SetRegistry(r)
    
  38. 	d.language.SetRegistry(r)
    
  39. 	return d
    
  40. }
    
  41. 
    
  42. func (d *delegatingLexer) Config() *Config {
    
  43. 	return d.language.Config()
    
  44. }
    
  45. 
    
  46. // An insertion is the character range where language tokens should be inserted.
    
  47. type insertion struct {
    
  48. 	start, end int
    
  49. 	tokens     []Token
    
  50. }
    
  51. 
    
  52. func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
    
  53. 	tokens, err := Tokenise(Coalesce(d.language), options, text)
    
  54. 	if err != nil {
    
  55. 		return nil, err
    
  56. 	}
    
  57. 	// Compute insertions and gather "Other" tokens.
    
  58. 	others := &bytes.Buffer{}
    
  59. 	insertions := []*insertion{}
    
  60. 	var insert *insertion
    
  61. 	offset := 0
    
  62. 	var last Token
    
  63. 	for _, t := range tokens {
    
  64. 		if t.Type == Other {
    
  65. 			if last != EOF && insert != nil && last.Type != Other {
    
  66. 				insert.end = offset
    
  67. 			}
    
  68. 			others.WriteString(t.Value)
    
  69. 		} else {
    
  70. 			if last == EOF || last.Type == Other {
    
  71. 				insert = &insertion{start: offset}
    
  72. 				insertions = append(insertions, insert)
    
  73. 			}
    
  74. 			insert.tokens = append(insert.tokens, t)
    
  75. 		}
    
  76. 		last = t
    
  77. 		offset += len(t.Value)
    
  78. 	}
    
  79. 
    
  80. 	if len(insertions) == 0 {
    
  81. 		return d.root.Tokenise(options, text)
    
  82. 	}
    
  83. 
    
  84. 	// Lex the other tokens.
    
  85. 	rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
    
  86. 	if err != nil {
    
  87. 		return nil, err
    
  88. 	}
    
  89. 
    
  90. 	// Interleave the two sets of tokens.
    
  91. 	var out []Token
    
  92. 	offset = 0 // Offset into text.
    
  93. 	tokenIndex := 0
    
  94. 	nextToken := func() Token {
    
  95. 		if tokenIndex >= len(rootTokens) {
    
  96. 			return EOF
    
  97. 		}
    
  98. 		t := rootTokens[tokenIndex]
    
  99. 		tokenIndex++
    
  100. 		return t
    
  101. 	}
    
  102. 	insertionIndex := 0
    
  103. 	nextInsertion := func() *insertion {
    
  104. 		if insertionIndex >= len(insertions) {
    
  105. 			return nil
    
  106. 		}
    
  107. 		i := insertions[insertionIndex]
    
  108. 		insertionIndex++
    
  109. 		return i
    
  110. 	}
    
  111. 	t := nextToken()
    
  112. 	i := nextInsertion()
    
  113. 	for t != EOF || i != nil {
    
  114. 		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
    
  115. 		if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
    
  116. 			var l Token
    
  117. 			l, t = splitToken(t, i.start-offset)
    
  118. 			if l != EOF {
    
  119. 				out = append(out, l)
    
  120. 				offset += len(l.Value)
    
  121. 			}
    
  122. 			out = append(out, i.tokens...)
    
  123. 			offset += i.end - i.start
    
  124. 			if t == EOF {
    
  125. 				t = nextToken()
    
  126. 			}
    
  127. 			i = nextInsertion()
    
  128. 		} else {
    
  129. 			out = append(out, t)
    
  130. 			offset += len(t.Value)
    
  131. 			t = nextToken()
    
  132. 		}
    
  133. 	}
    
  134. 	return Literator(out...), nil
    
  135. }
    
  136. 
    
  137. func splitToken(t Token, offset int) (l Token, r Token) {
    
  138. 	if t == EOF {
    
  139. 		return EOF, EOF
    
  140. 	}
    
  141. 	if offset == 0 {
    
  142. 		return EOF, t
    
  143. 	}
    
  144. 	if offset == len(t.Value) {
    
  145. 		return t, EOF
    
  146. 	}
    
  147. 	l = t.Clone()
    
  148. 	r = t.Clone()
    
  149. 	l.Value = l.Value[:offset]
    
  150. 	r.Value = r.Value[offset:]
    
  151. 	return
    
  152. }