您好,登錄后才能下訂單哦!
這篇文章主要介紹hadoop如何實現(xiàn)雙色球統(tǒng)計,文中介紹的非常詳細,具有一定的參考價值,感興趣的小伙伴們一定要看完!
1/使用hadoop把雙色球相鄰的紅球進行統(tǒng)計:
測試數(shù)據(jù)在:http://pan.baidu.com/s/1hq82YrU
import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** *主要過濾出雙色球相鄰號碼 * */ public class Adjacent extends Configured implements Tool { /** * 計數(shù)器 * 用于計數(shù)各種異常數(shù)據(jù) */ enum Counter { LINESKIP, //出錯的行 } /** * MAP任務(wù) */ public static class AdjacentMap extends Mapper<LongWritable, Text, Text, Text> { public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException { String line = value.toString(); //讀取源數(shù)據(jù): 2003001 10 11 12 13 26 28 11 line = line.replaceAll("\t", " ");//過濾掉制表符 try { //數(shù)據(jù)處理 String [] lineSplit = line.split(" "); if(lineSplit.length != 8){ return ; } //這里不判斷最后一個紅球,因為最后一個怎么也不會和后面有相鄰的球 String out = ""; int next =-1 ; List<String> list = new ArrayList<String>(); for(int i=1;i<7;i++){ int a = Integer.parseInt(lineSplit[i]); int b = 100; if(i<6){ b = Integer.parseInt(lineSplit[i+1]); } if(1==b-a){ if(next == a ){ out = out + " "+ b ; next = b ; }else{ out = "" ; out = out+a+" "+b+" " ; next = b ; } }else{ if(out.equals("")){ }else{ list.add(out); out = "" ; } } } if(list.size()>0){ for(String s :list){ context.write(new Text(s), new Text("1")); //輸出 } } } catch ( java.lang.ArrayIndexOutOfBoundsException e ) { context.getCounter(Counter.LINESKIP).increment(1); //出錯令計數(shù)器+1 return; } } } public static class AdjacentReducer extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException { int total = 0; for(Text text:value){ total++; } context.write(key,new Text("\t\t\t"+total) ); } } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); /** 需要注意的部分 **/ Job job = new Job(conf, "adjacent"); //任務(wù)名 job.setJarByClass(Adjacent.class); //指定Class FileInputFormat.addInputPath( job, new Path(args[0]) ); //輸入路徑 FileOutputFormat.setOutputPath( job, new Path(args[1]) ); //輸出路徑 job.setMapperClass( AdjacentMap.class ); //調(diào)用上面Map類作為Map任務(wù)代碼 job.setReducerClass(AdjacentReducer.class); job.setOutputFormatClass( TextOutputFormat.class ); job.setOutputKeyClass( Text.class ); //指定輸出的KEY的格式 job.setOutputValueClass( Text.class ); //指定輸出的VALUE的格式 job.waitForCompletion(true); //輸出任務(wù)完成情況 System.out.println( "任務(wù)名稱:" + job.getJobName() ); System.out.println( "任務(wù)成功:" + ( job.isSuccessful()?"是":"否" ) ); System.out.println( "輸入行數(shù):" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() ); System.out.println( "輸出行數(shù):" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() ); System.out.println( "跳過的行:" + job.getCounters().findCounter(Counter.LINESKIP).getValue() ); return job.isSuccessful() ? 0 : 1; } /** * 設(shè)置系統(tǒng)說明 * 設(shè)置MapReduce任務(wù) */ public static void main(String[] args) throws Exception { //判斷參數(shù)個數(shù)是否正確 //如果無參數(shù)運行則顯示以作程序說明 if ( args.length != 2 ) { System.err.println(""); System.err.println("Usage: Adjacent < input path > < output path > < name >"); System.err.println("Example: hadoop jar ~/adjacent.jar ./input/ssq03-12.txt ./output/adjacent.txt "); System.err.println("Counter:"); System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short"); System.exit(-1); } //記錄開始時間 DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" ); Date start = new Date(); //運行任務(wù) int res = ToolRunner.run(new Configuration(), new Adjacent(), args); //輸出任務(wù)耗時 Date end = new Date(); float time = (float) (( end.getTime() - start.getTime() ) / 60000.0) ; System.out.println( "任務(wù)開始:" + formatter.format(start) ); System.out.println( "任務(wù)結(jié)束:" + formatter.format(end) ); System.out.println( "任務(wù)耗時:" + String.valueOf( time ) + " 分鐘" ); System.exit(res); } }
2/對雙色球紅球出現(xiàn)的次數(shù)進行統(tǒng)計:
import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** *對雙色球紅球出現(xiàn)的次數(shù)進行統(tǒng) * */ public class TotalHong extends Configured implements Tool { /** * 計數(shù)器 * 用于計數(shù)各種異常數(shù)據(jù) */ enum Counter { LINESKIP, //出錯的行 } /** * MAP任務(wù) */ public static class AdjacentMap extends Mapper<LongWritable, Text, Text, Text> { public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException { String line = value.toString(); //讀取源數(shù)據(jù): 2003001 10 11 12 13 26 28 11 line = line.replaceAll("\t", " ");//過濾掉制表符 try { //數(shù)據(jù)處理 String [] lineSplit = line.split(" "); if(lineSplit.length != 8){ return ; } for(int i=1;i<7;i++){ context.write(new Text(lineSplit[i]), new Text("1")); //輸出 } } catch ( java.lang.ArrayIndexOutOfBoundsException e ) { context.getCounter(Counter.LINESKIP).increment(1); //出錯令計數(shù)器+1 return; } } } public static class AdjacentReducer extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException { int total = 0; for(Text text:value){ total++; } context.write(key,new Text(total+"") ); } } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); /** 需要注意的部分 **/ Job job = new Job(conf, "adjacent"); //任務(wù)名 job.setJarByClass(TotalHong.class); //指定Class FileInputFormat.addInputPath( job, new Path(args[0]) ); //輸入路徑 FileOutputFormat.setOutputPath( job, new Path(args[1]) ); //輸出路徑 job.setMapperClass( AdjacentMap.class ); //調(diào)用上面Map類作為Map任務(wù)代碼 job.setReducerClass(AdjacentReducer.class); job.setOutputFormatClass( TextOutputFormat.class ); job.setOutputKeyClass( Text.class ); //指定輸出的KEY的格式 job.setOutputValueClass( Text.class ); //指定輸出的VALUE的格式 job.waitForCompletion(true); //輸出任務(wù)完成情況 System.out.println( "任務(wù)名稱:" + job.getJobName() ); System.out.println( "任務(wù)成功:" + ( job.isSuccessful()?"是":"否" ) ); System.out.println( "輸入行數(shù):" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() ); System.out.println( "輸出行數(shù):" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() ); System.out.println( "跳過的行:" + job.getCounters().findCounter(Counter.LINESKIP).getValue() ); return job.isSuccessful() ? 0 : 1; } /** * 設(shè)置系統(tǒng)說明 * 設(shè)置MapReduce任務(wù) */ public static void main(String[] args) throws Exception { //判斷參數(shù)個數(shù)是否正確 //如果無參數(shù)運行則顯示以作程序說明 if ( args.length != 2 ) { System.err.println(""); System.err.println("Usage: Adjacent < input path > < output path > < name >"); System.err.println("Example: hadoop jar ~/adjacent.jar ./input/ssq03-12.txt ./output/adjacent.txt "); System.err.println("Counter:"); System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short"); System.exit(-1); } //記錄開始時間 DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" ); Date start = new Date(); //運行任務(wù) int res = ToolRunner.run(new Configuration(), new TotalHong(), args); //輸出任務(wù)耗時 Date end = new Date(); float time = (float) (( end.getTime() - start.getTime() ) / 60000.0) ; System.out.println( "任務(wù)開始:" + formatter.format(start) ); System.out.println( "任務(wù)結(jié)束:" + formatter.format(end) ); System.out.println( "任務(wù)耗時:" + String.valueOf( time ) + " 分鐘" ); System.exit(res); } }
以上是“hadoop如何實現(xiàn)雙色球統(tǒng)計”這篇文章的所有內(nèi)容,感謝各位的閱讀!希望分享的內(nèi)容對大家有幫助,更多相關(guān)知識,歡迎關(guān)注億速云行業(yè)資訊頻道!
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。