var data = sc.textFile("hdfs://toshiba:9000/user/xiaohei/hadoop/access.20120104.log")
var top50 = data.map(line => """(\d+\.){3}\d+""".r.findFirstIn(line).mkString).filter(_!="").map(word => (word,1)).reduceByKey(_+_).map(word=>(word._2,word._1)).sortByKey(false).map(word=>(word._2,word._1)) take 50
top50.foreach(x => println(x))
利用元组位置
1
2
3
4
5
var data = sc.textFile("hdfs://toshiba:9000/user/xiaohei/hadoop/access.20120104.log")
var top10 = data.map(_.split(" ")).map(line => (line(0),1)).reduceByKey(_+_).map(word => (word._2,word._1)).sortByKey(false).map(word => (word._2,word._1)) take 10