您好,登錄后才能下訂單哦!
小編給大家分享一下spark與hbase怎么用,希望大家閱讀完這篇文章之后都有所收獲,下面讓我們一起去探討吧!
package hgs.spark.hbase import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.rdd.NewHadoopRDD import org.apache.hadoop.hbase.mapreduce.TableInputFormat object HbaseTest { def main(args: Array[String]): Unit = { val conf = new SparkConf conf.setMaster("local").setAppName("local") val context = new SparkContext(conf) val hadoopconf = new HBaseConfiguration hadoopconf.set("hbase.zookeeper.quorum", "bigdata01:2181,bigdata02:2181,bigdata03:2181") hadoopconf.set("hbase.zookeeper.property.clientPort", "2181") val tableName = "test1" hadoopconf.set(TableInputFormat.INPUT_TABLE, tableName) hadoopconf.set(TableInputFormat.SCAN_ROW_START, "h") hadoopconf.set(TableInputFormat.SCAN_ROW_STOP, "x") hadoopconf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "cf1") hadoopconf.set(TableInputFormat.SCAN_COLUMNS, "cf1:col1,cf1:col2") /*val startrow = "h" val stoprow = "w" val scan = new Scan scan.setStartRow(startrow.getBytes) scan.setStartRow(stoprow.getBytes) val proto = ProtobufUtil.toScan(scan) val scanToString = Base64.encodeBytes(proto.toByteArray()) println(scanToString) hadoopconf.set(TableInputFormat.SCAN, scanToString) */ val hbaseRdd = context.newAPIHadoopRDD(hadoopconf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hbaseRdd.foreach(x=>{ val vale = x._2.getValue("cf1".getBytes, "col1".getBytes) val val2 = x._2.getValue("cf1".getBytes, "col2".getBytes) println(new String(vale),new String(val2)) }) context.stop() } }
package hgs.spark.hbase import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.mapred.TableOutputFormat import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.io.ImmutableBytesWritable object SparkToHbase { def main(args: Array[String]): Unit = { val conf = new SparkConf conf.setMaster("local").setAppName("local") val context = new SparkContext(conf) val rdd = context.parallelize(List(("aaaaaaa","aaaaaaa"),("bbbbb","bbbbb")), 2) val hadoopconf = new HBaseConfiguration hadoopconf.set("hbase.zookeeper.quorum", "bigdata01:2181,bigdata02:2181,bigdata03:2181") hadoopconf.set("hbase.zookeeper.property.clientPort", "2181") hadoopconf.set(TableOutputFormat.OUTPUT_TABLE, "test1") //hadoopconf.set(TableOutputFormat., "test1") val jobconf = new JobConf(hadoopconf,this.getClass) jobconf.set(TableOutputFormat.OUTPUT_TABLE, "test1") jobconf.setOutputFormat(classOf[TableOutputFormat]) val exterrdd = rdd.map(x=>{ val put = new Put(x._1.getBytes) put.add("cf1".getBytes, "col1".getBytes, x._2.getBytes) (new ImmutableBytesWritable,put) }) exterrdd.saveAsHadoopDataset(jobconf) context.stop() } }
看完了這篇文章,相信你對(duì)“spark與hbase怎么用”有了一定的了解,如果想了解更多相關(guān)知識(shí),歡迎關(guān)注億速云行業(yè)資訊頻道,感謝各位的閱讀!
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng),如果涉及侵權(quán)請(qǐng)聯(lián)系站長(zhǎng)郵箱:is@yisu.com進(jìn)行舉報(bào),并提供相關(guān)證據(jù),一經(jīng)查實(shí),將立刻刪除涉嫌侵權(quán)內(nèi)容。