hadoop如何實現(xiàn)雙色球統(tǒng)計

發(fā)布時間：2021-12-08 10:56:50 來源：億速云閱讀：131 作者：小新欄目：云計算

這篇文章主要介紹hadoop如何實現(xiàn)雙色球統(tǒng)計，文中介紹的非常詳細，具有一定的參考價值，感興趣的小伙伴們一定要看完！

1/使用hadoop把雙色球相鄰的紅球進行統(tǒng)計:

測試數(shù)據(jù)在:http://pan.baidu.com/s/1hq82YrU

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


/**
 *主要過濾出雙色球相鄰號碼
 * */

public class Adjacent extends Configured implements Tool {	
	
	/**  
	 * 計數(shù)器
	 * 用于計數(shù)各種異常數(shù)據(jù)
	 */  
	enum Counter 
	{
		LINESKIP,	//出錯的行
	}
	
	/**  
	 * MAP任務(wù)
	 */  
	public static class AdjacentMap extends Mapper<LongWritable, Text, Text, Text> 
	{
		public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException 
		{
			String line = value.toString();				//讀取源數(shù)據(jù): 2003001	10	11	12	13	26	28 11
			line = line.replaceAll("\t", " ");//過濾掉制表符
			try
			{
				//數(shù)據(jù)處理
				String [] lineSplit = line.split(" ");
				if(lineSplit.length != 8){
					return ;
				}
				
				//這里不判斷最后一個紅球,因為最后一個怎么也不會和后面有相鄰的球
				String out = "";
				int next =-1 ;
				List<String> list = new ArrayList<String>();
				for(int i=1;i<7;i++){
					int a = Integer.parseInt(lineSplit[i]);
					int b = 100;
					if(i<6){
						b = Integer.parseInt(lineSplit[i+1]);
					}
					
					if(1==b-a){
						if(next == a ){
							out = out + " "+ b ;
							next = b ;
						}else{
							out = "" ;
							out = out+a+" "+b+" " ;
							next = b ;
						}
					}else{
						if(out.equals("")){
							
						}else{
							list.add(out);
							out = "" ; 
						}
					}
					
				}
				
				if(list.size()>0){
					for(String s :list){
						context.write(new Text(s), new Text("1"));	//輸出
					}
				}
			}
			catch ( java.lang.ArrayIndexOutOfBoundsException e )
			{
				context.getCounter(Counter.LINESKIP).increment(1);	//出錯令計數(shù)器+1
				return;
			}
		}
	}
	
	
	public static class AdjacentReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> value,
				Context context)
				throws IOException, InterruptedException {
			int total = 0;
			for(Text text:value){
				total++;
			}
			context.write(key,new Text("\t\t\t"+total) );
			
		}
	}


	@Override
	public int run(String[] args) throws Exception 
	{
		Configuration conf = getConf();
		

		/**  需要注意的部分  	 **/ 

		Job job = new Job(conf, "adjacent");							//任務(wù)名
		job.setJarByClass(Adjacent.class);							//指定Class
		
		FileInputFormat.addInputPath( job, new Path(args[0]) );			//輸入路徑
		FileOutputFormat.setOutputPath( job, new Path(args[1]) );		//輸出路徑
		
		job.setMapperClass( AdjacentMap.class );								//調(diào)用上面Map類作為Map任務(wù)代碼
		job.setReducerClass(AdjacentReducer.class);
		job.setOutputFormatClass( TextOutputFormat.class );
		job.setOutputKeyClass( Text.class );					//指定輸出的KEY的格式
		job.setOutputValueClass( Text.class );							//指定輸出的VALUE的格式
		
		job.waitForCompletion(true);
		
		//輸出任務(wù)完成情況
		System.out.println( "任務(wù)名稱：" + job.getJobName() );
		System.out.println( "任務(wù)成功：" + ( job.isSuccessful()?"是":"否" ) );
		System.out.println( "輸入行數(shù)：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );
		System.out.println( "輸出行數(shù)：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );
		System.out.println( "跳過的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

		return job.isSuccessful() ? 0 : 1;
	}
	
	/**  
	 * 設(shè)置系統(tǒng)說明
	 * 設(shè)置MapReduce任務(wù)
	 */  
	public static void main(String[] args) throws Exception 
	{
		
		//判斷參數(shù)個數(shù)是否正確
		//如果無參數(shù)運行則顯示以作程序說明
		if ( args.length != 2 )
		{
			System.err.println("");
			System.err.println("Usage: Adjacent < input path > < output path > < name >");
			System.err.println("Example: hadoop jar ~/adjacent.jar ./input/ssq03-12.txt ./output/adjacent.txt ");
			System.err.println("Counter:");
			System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");
			System.exit(-1);
		}
		
		//記錄開始時間
		DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );
		Date start = new Date();
		
		//運行任務(wù)
		int res = ToolRunner.run(new Configuration(), new Adjacent(), args);

		//輸出任務(wù)耗時
		Date end = new Date();
		float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;
		System.out.println( "任務(wù)開始：" + formatter.format(start) );
		System.out.println( "任務(wù)結(jié)束：" + formatter.format(end) );
		System.out.println( "任務(wù)耗時：" + String.valueOf( time ) + " 分鐘" ); 

        System.exit(res);
	}
}

2/對雙色球紅球出現(xiàn)的次數(shù)進行統(tǒng)計:

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


/**
 *對雙色球紅球出現(xiàn)的次數(shù)進行統(tǒng)
 * */

public class TotalHong extends Configured implements Tool {	
	
	/**  
	 * 計數(shù)器
	 * 用于計數(shù)各種異常數(shù)據(jù)
	 */  
	enum Counter 
	{
		LINESKIP,	//出錯的行
	}
	
	/**  
	 * MAP任務(wù)
	 */  
	public static class AdjacentMap extends Mapper<LongWritable, Text, Text, Text> 
	{
		public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException 
		{
			String line = value.toString();				//讀取源數(shù)據(jù): 2003001	10	11	12	13	26	28 11
			line = line.replaceAll("\t", " ");//過濾掉制表符
			try
			{
				//數(shù)據(jù)處理
				String [] lineSplit = line.split(" ");
				if(lineSplit.length != 8){
					return ;
				}
				for(int i=1;i<7;i++){
					context.write(new Text(lineSplit[i]), new Text("1"));	//輸出
				}
				
			}
			catch ( java.lang.ArrayIndexOutOfBoundsException e )
			{
				context.getCounter(Counter.LINESKIP).increment(1);	//出錯令計數(shù)器+1
				return;
			}
		}
	}
	
	
	public static class AdjacentReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> value,
				Context context)
				throws IOException, InterruptedException {
			int total = 0;
			for(Text text:value){
				total++;
			}
			context.write(key,new Text(total+"") );
			
		}
	}


	@Override
	public int run(String[] args) throws Exception 
	{
		Configuration conf = getConf();
		

		/**  需要注意的部分  	 **/ 

		Job job = new Job(conf, "adjacent");							//任務(wù)名
		job.setJarByClass(TotalHong.class);							//指定Class
		
		FileInputFormat.addInputPath( job, new Path(args[0]) );			//輸入路徑
		FileOutputFormat.setOutputPath( job, new Path(args[1]) );		//輸出路徑
		
		job.setMapperClass( AdjacentMap.class );								//調(diào)用上面Map類作為Map任務(wù)代碼
		job.setReducerClass(AdjacentReducer.class);
		job.setOutputFormatClass( TextOutputFormat.class );
		job.setOutputKeyClass( Text.class );					//指定輸出的KEY的格式
		job.setOutputValueClass( Text.class );							//指定輸出的VALUE的格式
		
		job.waitForCompletion(true);
		
		//輸出任務(wù)完成情況
		System.out.println( "任務(wù)名稱：" + job.getJobName() );
		System.out.println( "任務(wù)成功：" + ( job.isSuccessful()?"是":"否" ) );
		System.out.println( "輸入行數(shù)：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );
		System.out.println( "輸出行數(shù)：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );
		System.out.println( "跳過的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

		return job.isSuccessful() ? 0 : 1;
	}
	
	/**  
	 * 設(shè)置系統(tǒng)說明
	 * 設(shè)置MapReduce任務(wù)
	 */  
	public static void main(String[] args) throws Exception 
	{
		
		//判斷參數(shù)個數(shù)是否正確
		//如果無參數(shù)運行則顯示以作程序說明
		if ( args.length != 2 )
		{
			System.err.println("");
			System.err.println("Usage: Adjacent < input path > < output path > < name >");
			System.err.println("Example: hadoop jar ~/adjacent.jar ./input/ssq03-12.txt ./output/adjacent.txt ");
			System.err.println("Counter:");
			System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");
			System.exit(-1);
		}
		
		//記錄開始時間
		DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );
		Date start = new Date();
		
		//運行任務(wù)
		int res = ToolRunner.run(new Configuration(), new TotalHong(), args);

		//輸出任務(wù)耗時
		Date end = new Date();
		float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;
		System.out.println( "任務(wù)開始：" + formatter.format(start) );
		System.out.println( "任務(wù)結(jié)束：" + formatter.format(end) );
		System.out.println( "任務(wù)耗時：" + String.valueOf( time ) + " 分鐘" ); 

        System.exit(res);
	}
}

以上是“hadoop如何實現(xiàn)雙色球統(tǒng)計”這篇文章的所有內(nèi)容，感謝各位的閱讀！希望分享的內(nèi)容對大家有幫助，更多相關(guān)知識，歡迎關(guān)注億速云行業(yè)資訊頻道！

向AI問一下細節(jié)

hadoop如何實現(xiàn)雙色球統(tǒng)計

猜你喜歡

最新資訊

相關(guān)推薦

相關(guān)標(biāo)簽