1. <lexer>
    
  2.   <config>
    
  3.     <name>Pig</name>
    
  4.     <alias>pig</alias>
    
  5.     <filename>*.pig</filename>
    
  6.     <mime_type>text/x-pig</mime_type>
    
  7.     <case_insensitive>true</case_insensitive>
    
  8.   </config>
    
  9.   <rules>
    
  10.     <state name="root">
    
  11.       <rule pattern="\s+">
    
  12.         <token type="Text"/>
    
  13.       </rule>
    
  14.       <rule pattern="--.*">
    
  15.         <token type="Comment"/>
    
  16.       </rule>
    
  17.       <rule pattern="/\*[\w\W]*?\*/">
    
  18.         <token type="CommentMultiline"/>
    
  19.       </rule>
    
  20.       <rule pattern="\\\n">
    
  21.         <token type="Text"/>
    
  22.       </rule>
    
  23.       <rule pattern="\\">
    
  24.         <token type="Text"/>
    
  25.       </rule>
    
  26.       <rule pattern="\&#39;(?:\\[ntbrf\\\&#39;]|\\u[0-9a-f]{4}|[^\&#39;\\\n\r])*\&#39;">
    
  27.         <token type="LiteralString"/>
    
  28.       </rule>
    
  29.       <rule>
    
  30.         <include state="keywords"/>
    
  31.       </rule>
    
  32.       <rule>
    
  33.         <include state="types"/>
    
  34.       </rule>
    
  35.       <rule>
    
  36.         <include state="builtins"/>
    
  37.       </rule>
    
  38.       <rule>
    
  39.         <include state="punct"/>
    
  40.       </rule>
    
  41.       <rule>
    
  42.         <include state="operators"/>
    
  43.       </rule>
    
  44.       <rule pattern="[0-9]*\.[0-9]+(e[0-9]+)?[fd]?">
    
  45.         <token type="LiteralNumberFloat"/>
    
  46.       </rule>
    
  47.       <rule pattern="0x[0-9a-f]+">
    
  48.         <token type="LiteralNumberHex"/>
    
  49.       </rule>
    
  50.       <rule pattern="[0-9]+L?">
    
  51.         <token type="LiteralNumberInteger"/>
    
  52.       </rule>
    
  53.       <rule pattern="\n">
    
  54.         <token type="Text"/>
    
  55.       </rule>
    
  56.       <rule pattern="([a-z_]\w*)(\s*)(\()">
    
  57.         <bygroups>
    
  58.           <token type="NameFunction"/>
    
  59.           <token type="Text"/>
    
  60.           <token type="Punctuation"/>
    
  61.         </bygroups>
    
  62.       </rule>
    
  63.       <rule pattern="[()#:]">
    
  64.         <token type="Text"/>
    
  65.       </rule>
    
  66.       <rule pattern="[^(:#\&#39;&#34;)\s]+">
    
  67.         <token type="Text"/>
    
  68.       </rule>
    
  69.       <rule pattern="\S+\s+">
    
  70.         <token type="Text"/>
    
  71.       </rule>
    
  72.     </state>
    
  73.     <state name="keywords">
    
  74.       <rule pattern="(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|%declare|%default|define|dense|desc|describe|distinct|du|dump|eval|exex|explain|filter|flatten|foreach|full|generate|group|help|if|illustrate|import|inner|input|into|is|join|kill|left|limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|outer|output|parallel|pig|pwd|quit|register|returns|right|rm|rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|stream|through|union|using|void)\b">
    
  75.         <token type="Keyword"/>
    
  76.       </rule>
    
  77.     </state>
    
  78.     <state name="builtins">
    
  79.       <rule pattern="(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|TOKENIZE)\b">
    
  80.         <token type="NameBuiltin"/>
    
  81.       </rule>
    
  82.     </state>
    
  83.     <state name="types">
    
  84.       <rule pattern="(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|int|long|tuple)\b">
    
  85.         <token type="KeywordType"/>
    
  86.       </rule>
    
  87.     </state>
    
  88.     <state name="punct">
    
  89.       <rule pattern="[;(){}\[\]]">
    
  90.         <token type="Punctuation"/>
    
  91.       </rule>
    
  92.     </state>
    
  93.     <state name="operators">
    
  94.       <rule pattern="[#=,./%+\-?]">
    
  95.         <token type="Operator"/>
    
  96.       </rule>
    
  97.       <rule pattern="(eq|gt|lt|gte|lte|neq|matches)\b">
    
  98.         <token type="Operator"/>
    
  99.       </rule>
    
  100.       <rule pattern="(==|&lt;=|&lt;|&gt;=|&gt;|!=)">
    
  101.         <token type="Operator"/>
    
  102.       </rule>
    
  103.     </state>
    
  104.   </rules>
    
  105. </lexer>