lexer grammar XHTML2TXT;
options {
    filter=true;
}


Normaliza 	: 
	  {getLine() < 11}? => 
	      ('<b>'|'<i>'|'<p>'|'</p>'|'</b>'|'</i>')+
//	      {System.err.println("l="+getLine()+"t="+getText());}
	     {setText("");} 	     
	| '\n<p><b>' (options {greedy=false;} : ~('\r' | '<')+ ) '</b></p>\r'
	    {setText("\n"+getText().substring(7, getText().length() - 9)+"\r");}
	| '\n<p><b><i>' (options {greedy=false;} : ~('\r' | '<')+ ) '</i></b></p>\r'
	    {setText("\n"+getText().substring(10, getText().length() - 13)+"\r");}
	| '\n<p><i><b>' (options {greedy=false;} : ~('\r' | '<')+ ) '</b></i></p>\r'
	    {setText("\n"+getText().substring(10, getText().length() - 13)+"\r");}
	| '\n<p><i>' (options {greedy=false;} : ~('\r' | '<')+ ) '</i></p>\r'
	    {setText("\n"+getText().substring(7, getText().length() - 9)+"\r");}
	|     ('</b><b>')
	     {setText("");} 	     
	|     ('</i><i>')
	     {setText("");} 	     
	| ('\n<p><b></b></p>\r')
	     {setText("");} 	     
	|  ('<p>')
	     {setText("");}
	| ( '</p>')
	     {setText("");} 	     
	| ( ' '+ '</p>')
	     {setText("");} 	     
	| ('<i></i>')
	     {setText("");} 	     
	| ('<b></b>')
	     {setText("");} 	     
	| ('<b>' ' ' ' '? ' '? ' '? ' '? ' '? ' '? ' '? ' '? ' '? ' '? '</b>')
	   // com ' '+, est gerando um trmino anormal 'java.lang.ArrayIndexOutOfBoundsException: -1
	     {setText(" ");} 	     
	| ('<b></b>')
	     {setText("");} 	     
	| ('n<sup>os</sup>')
	     {setText("ns");}
	| ('<sup>o</sup>')
	     {setText("");}
	| ('<sup>2</sup>')
	     {setText("");}   
	| ('<sup>3</sup>')
	     {setText("");}   
	| ('<i>ex officio</i>')
	     {setText("ex officio");}
	| ('<i> </i>')
	     {setText(" ");} 	     
	| ('<i>' ('a'..'z') '</i>')
	  // retira itlico de alineas
	     {setText(getText().substring(3, 4));} 	     
	| ('<b>Art</b>')
	     {setText("Art");} 	     
	| ('<b>' ' '+ 'Art.</b>')
	     {setText(" Art.");} 	     
	| ('<b>Art.</b>')
	     {setText("Art.");} 	     
	| ('<b>Art. </b>')
	     {setText("Art. ");} 	     
	| ('<b> Art</b>')
	     {setText(" Art");} 	     
	| ('<b>Art. 1<sup>o</sup></b>')
	     {setText("Art. 1");} 	     
	| ('<b>Art. 2<sup>o</sup></b>')
	     {setText("Art. 2");} 	     
	| ('<b> Art. </b>')
	     {setText(" Art. ");} 	     
	| ('<u></u>')
	     {setText("");} 
	| ('<?xml version="1.0" encoding="UTF-8"?>\r\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" \r\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >\r\n')
	     {setText("");}	    
	| ( '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:lexml="http://www.lexml.gov.br/0.6">\r\n<head><title>Formato XHTML do LexML</title></head>\r\n')
	     {setText("");} 	    
	| ('</html>' | '</body>'| '<body>\r\n')
	     {setText("");} 	    
	;

Resto 	:
	 .;	

