<b id="vkpms"><nav id="vkpms"></nav></b>

<table id="vkpms"></table>

溫馨提示×

溫馨提示×

您好，登錄后才能下訂單哦！

密碼登錄×

忘記密碼？

登錄注冊(cè)×

獲取短信驗(yàn)證碼

其他方式登錄

點(diǎn)擊登錄注冊(cè) 即表示同意《億速云用戶服務(wù)條款》

用戶登錄×

賬戶密碼登錄

請(qǐng)使用微信掃描上方二維碼

使用幫助

請(qǐng)求超時(shí)！

請(qǐng)點(diǎn)擊重新獲取二維碼

hadoop如何自定義格式化輸出

發(fā)布時(shí)間：2021-12-09 16:25:34 來(lái)源：億速云閱讀：183 作者：小新欄目：大數(shù)據(jù)

這篇文章給大家分享的是有關(guān)hadoop如何自定義格式化輸出的內(nèi)容。小編覺(jué)得挺實(shí)用的，因此分享給大家做個(gè)參考，一起跟隨小編過(guò)來(lái)看看吧。

import java.io.IOException;
import java.net.URI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CustomizeOutputFormat {
	static final Log LOG = LogFactory.getLog(CustomizeOutputFormat.class);
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(CustomizeOutputFormat.class);
		job.setMapperClass(CustMapper.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		//此處這只自定義的格式化輸出
		job.setOutputFormatClass(CustOutputFormat.class);
		String jobName = "Customize outputformat test!";
		job.setJobName(jobName);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		boolean b = job.waitForCompletion(true);
		if(b) {
			LOG.info("Job "+ jobName +" is done.");
			
		}else {
			LOG.info("Job "+ jobName +"is going wrong,now exit.");
			System.exit(0);
		}
		
	}
}
class CustMapper extends Mapper<LongWritable, Text, Text, Text>{
	String[] textIn = null;
	Text outkey = new Text();
	Text outvalue = new Text();
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
			throws IOException, InterruptedException {
		/**
		 * 假設(shè)文件的內(nèi)容為如下：
		 * boys	girls
		 * firends goodbye
		 * down up
		 * fly to
		 * neibors that
		 * 
		 */
		textIn = value.toString().split("\t");
		outkey.set(textIn[0]);
		outvalue.set(textIn[1]);
		context.write(outkey, outvalue);		
	}	
}
//自定義OutoutFormat
class CustOutputFormat extends FileOutputFormat<Text, Text>{
	@Override
	public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
		//獲得configration
		Configuration conf = context.getConfiguration();
		//獲得FileSystem
		FileSystem fs =  FileSystem.newInstance(conf);
		//獲得輸出路徑
		Path path = CustOutputFormat.getOutputPath(context);
		URI uri = path.toUri();
		//創(chuàng)建兩個(gè)文件，得到寫(xiě)入流
		FSDataOutputStream foa = fs.create(new Path(uri.toString()+"/out.a"));
		FSDataOutputStream fob = fs.create(new Path(uri.toString()+"/out.b"));	
		//創(chuàng)建自定義RecordWriter  傳入 兩個(gè)流
		CustRecordWriter rw = new CustRecordWriter(foa,fob);
		return rw;
		
	}
	
	
	class CustRecordWriter extends RecordWriter<Text, Text>{
		 FSDataOutputStream foa = null;
		 FSDataOutputStream fob = null;
		CustRecordWriter(FSDataOutputStream foa,FSDataOutputStream fob){
			this.foa = foa;
			this.fob = fob;
		}
		@Override
		public void write(Text key, Text value) throws IOException, InterruptedException {
			String mText  = key.toString();
			//根據(jù)可以長(zhǎng)度的不同分別輸入到不同的文件
			if(mText.length()>=5) {
				foa.writeUTF(mText+"\t"+value.toString()+"\n");
			}else {
				fob.writeUTF(mText+"\t"+value.toString()+"\n");
			}
		}
		@Override
		public void close(TaskAttemptContext context) throws IOException, InterruptedException {
			//最后將兩個(gè)寫(xiě)入流關(guān)閉
			if(foa!=null) {
				foa.close();
			}
			if(fob!=null) {
				fob.close();
			}
		}
	}
	
}
//使用MultipleInputs，c處理多個(gè)來(lái)源的文件
package hgs.multipuleinput;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import hgs.custsort.SortBean;
import hgs.custsort.SortDriver;
import hgs.custsort.SortMapper;
import hgs.custsort.SortReducer;
public class MultipuleInputDriver {
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(SortDriver.class);
		job.setMapperClass(SortMapper.class);
		job.setReducerClass(SortReducer.class);
		job.setOutputKeyClass(SortBean.class);
		job.setOutputValueClass(NullWritable.class);
		
		MultipleInputs.addInputPath(job, new Path("/sort"), TextInputFormat.class,SortMapper.class);
		MultipleInputs.addInputPath(job, new Path("/sort1"), TextInputFormat.class,SortMapper.class);
		//FileInputFormat.setInputPaths(job, new Path("/sort"));
		FileOutputFormat.setOutputPath(job, new Path("/sortresult"));
		System.exit(job.waitForCompletion(true)==true?0:1);
	}
}

感謝各位的閱讀！關(guān)于“hadoop如何自定義格式化輸出”這篇文章就分享到這里了，希望以上內(nèi)容可以對(duì)大家有一定的幫助，讓大家可以學(xué)到更多知識(shí)，如果覺(jué)得文章不錯(cuò)，可以把它分享出去讓更多的人看到吧！

向AI問(wèn)一下細(xì)節(jié)

推薦閱讀：

免責(zé)聲明：本站發(fā)布的內(nèi)容（圖片、視頻和文字）以原創(chuàng)、轉(zhuǎn)載和分享為主，文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng)，如果涉及侵權(quán)請(qǐng)聯(lián)系站長(zhǎng)郵箱：is@yisu.com進(jìn)行舉報(bào)，并提供相關(guān)證據(jù)，一經(jīng)查實(shí)，將立刻刪除涉嫌侵權(quán)內(nèi)容。

上一篇新聞：
Hadoop集群怎樣安裝
下一篇新聞：
怎么用Elasticsearch打造知識(shí)庫(kù)檢索系統(tǒng)

猜你喜歡

AI
助
手

產(chǎn)品服務(wù)

地區(qū)劃分

專(zhuān)題活動(dòng)

幫助支持

關(guān)于我們

售后咨詢

7*24小時(shí)在線電話：400-100-2938

7*24小時(shí)在線 QQ：800811969

關(guān)注億速云

億速云公眾號(hào)

手機(jī)網(wǎng)站二維碼

<table id="k3e5e"><nav id="k3e5e"></nav></table>