import java.sql.DriverManager
import java.time.{LocalDateTime, ZoneOffset}
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
// spark-submit --master local[*] --jars /root/sparkjob/mysql-connector-java-5.1.38.jar --class com.zxb.sparkapplication.readwrite.SparkReadMysql /root/sparkjob/original-scalatest-1.0-SNAPSHOT.jar
/**
* spark读取mysql数据
*/
object SparkReadMysql {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("spark write mysql")
val sc = new SparkContext(conf)
// 连接mysql相关配置信息
val driverClassName = "com.mysql.jdbc.Driver"
val url = "jdbc:mysql://ip:3306/xunwu?characterEncoding=utf8&useSSL=false"
val user = "root"
val password = "123456"
//mysql里时间类型为datetime,传入的条件为时间戳
val sql = "select id,title,price,area from house where create_time > from_unixtime(?) and create_time < from_unixtime(?)"
val connection = () => {
Class.forName(driverClassName)
DriverManager.getConnection(url, user, password)
}
val startTime = LocalDateTime.of(2017, 1, 3, 0, 0, 0)
val endTime = LocalDateTime.of(2019, 11, 4, 0, 0)
//mysql的时间戳只有10位,需要把java里的13位时间戳降低精度,直接除以1000
val startTimeStamp = startTime.toInstant(ZoneOffset.ofHours(8)).toEpochMilli / 1000
val endTimeStamp = endTime.toInstant(ZoneOffset.ofHours(8)).toEpochMilli / 1000
println("startTime: " + startTime + ", endTime: " + endTime)
println("startTime: " + startTimeStamp + ", endTime: " + endTimeStamp)
//读取
val result: JdbcRDD[(Int, String, Int, Int)] = new JdbcRDD[(Int, String, Int, Int)](
sc,
connection,
sql,
startTimeStamp,
endTimeStamp,
1,
rs => {
val id = rs.getInt(1)
val title = rs.getString(2)
val price = rs.getInt(3)
val area = rs.getInt(4)
(id,title,price,area)
}
)
result.collect().foreach(println)
sc.stop()
}
}