用scala统计单词个数一步一步详解
val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
//先按空格切分
val lines_1 = lines.map(_.split(" "))
//压平
val lines_2 = lines_1.flatten
//用flatMap 两步合并成一步
val words= lines.flatMap(_.split(" "))
//将每一个单词映射为元组
val wordToOne = lines_3.map(x=>(x,1))
val wordToOne = lines_3.map((_,1))
//分组
val grouped = wordToOne.groupBy(_._1)
组内求和
val grouped_1 = grouped.map(t => (t._1,t._2.size))
//将map转化成List
val result = grouped_1.toList
//排序
val result_1 = result.sortBy(_._2).reverse
合并为一句
val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).map(t=>(t._1,t._2.size)).toList.sortBy(_._2).reverse
val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).mapValues(_.size).toList.sortBy(_._2).reverse
val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).mapValues(_.foldLeft(0)
(_+_._2)).toList.sortBy(_._2).reverse
从本地读取文件
[java] view plain copy
- package main.count
- import scala.io.Source
- object WordCounts {
- /*
- * 从本地读取文件统计个数
- * */
- def main(args: Array[String]): Unit = {
- val lines=Source.fromFile("d:\words.txt").getLines().toBuffer
- //val lines=lines.map(_.split(" ")).flatten.map(t=>(t,1)).groupBy(_._1).map(x=>(x._1,x._2.size)).toList.sortBy(_._2).reverse
- val lines1=lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).map(t=>(t._1,t._2.size)).toList.sortBy(_._2).reverse
- println(lines1)
- }
- }