Ruby1.9读取txt乱码问题

出以下错误

in`split':invalidbytesequenceinGBK(ArgumentError)

解决方案:

require "iconv"

class String
  def to_gbk
    Iconv.iconv("GBK//IGNORE", "UTF-8//IGNORE", self).to_s
  end

  def to_utf8
    #p "my own string"
    Iconv.iconv("UTF-8//IGNORE", "GBK//IGNORE", self).to_s
  end

  def to_utf8_valid

    if !self.valid_encoding?
      ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
      return ic.iconv(self)
    end
    self
  end

end
puts "读取文件"
f=File.open("boss2.txt", 'r')
i=0
total=0
f.each_line do |line|
  i+=1

  line_arr=line.to_utf8_valid.split(/\t/)
  len =line_arr.length
  if (len!=41)
    puts "line: #{line_arr[0]} #{len}"
    total+=1
  end
  #puts(i,len)
end
puts "total:#{total}"

相关推荐