/ix ) { |item|
link = $2
counter = counter + 1
link_list += "[#{counter}] #{link}\n"
"[#{counter}] "
}
# remove empty lines
string.gsub!( /^\s*/m, '' )
# pre/code handling 1/2
string.gsub!( /(.+?)<\/pre>/m ) { |placeholder|
placeholder = placeholder.gsub(/\n/, "###BR###")
}
string.gsub!( /(.+?)<\/code>/m ) { |placeholder|
placeholder = placeholder.gsub(/\n/, "###BR###")
}
# remove all new lines
string.gsub!( /(\n\r|\r\r\n|\r\n|\n)/, '' )
# pre/code handling 2/2
string.gsub!( /###BR###/, "\n" )
# add counting
string.gsub!(/]*)>/i, "\n* ")
# add quoting
string.gsub!(/]*)>/i, '> ')
# add hr
string.gsub!(/
]*)>/i, "___\n")
# add new lines
string.gsub!( /\<(br|table)(|\/| [^>]*)\>/i, "\n" )
string.gsub!( /\<\/(div|p|pre|blockquote|table|tr)(|\s.+?)\>/i, "\n" )
string.gsub!( /\<\/td\>/i, ' ' )
# strip all other tags
string.gsub!( /\<.+?\>/, '' )
# strip all & < > "
string.gsub!( '&', '&' )
string.gsub!( '<', '<' )
string.gsub!( '>', '>' )
string.gsub!( '"', '"' )
string.gsub!( ' ', ' ' )
# encode html entities like "–"
string.gsub!( /(&\#(\d+);?)/x ) { |item|
$2.chr
}
# encode html entities like "d;"
string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) { |item|
chr_orig = $1
hex = $2.hex
if hex
chr = hex.chr
if chr
chr_orig = chr
else
chr_orig
end
else
chr_orig
end
# check valid encoding
begin
if !chr_orig.encode('UTF-8').valid_encoding?
chr_orig = '?'
end
rescue
chr_orig = '?'
end
chr_orig
}
# remove tailing empty spaces
string.gsub!(/\s+\n$/, "\n")
# remove multible empty lines
string.gsub!(/\n\n\n/, "\n\n")
# add extracted links
if link_list != ''
string += "\n\n" + link_list
end
string.strip
end
=begin
html = text_string.text2html
=end
def text2html
text = CGI.escapeHTML( self )
text.gsub!(/\n/, '
')
text.chomp
end
end