i have hbase table has unique rowkey , 1 column family , 1 column.. have tsv file has around 300+ columns. rowkey in file combined value of 2 columns. need compare rowkey table , file, if rowkey matches need insert table column value last column in tsv file respective rowkey. have written following code, executes else part.
package mapreduce; import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.filereader; import java.io.filewriter; import java.util.arraylist; import java.util.iterator; import java.util.list; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.hbase.hbaseconfiguration; import org.apache.hadoop.hbase.keyvalue; import org.apache.hadoop.hbase.client.htable; import org.apache.hadoop.hbase.client.result; import org.apache.hadoop.hbase.client.resultscanner; import org.apache.hadoop.hbase.client.scan; public class tsv_read{ private static configuration conf = null; static { conf = hbaseconfiguration.create(); } @suppresswarnings("resource") public static void main(string[] arg) throws exception { bufferedreader tsvfile = new bufferedreader(new filereader("path/to/file/.tsv")); string datarow = tsvfile.readline(); list<string> list = new arraylist<string>(); while (datarow != null){ list.clear(); string[] dataarray = datarow.split("\t"); (string item:dataarray) { htable table = new htable(conf, "table name"); //hbase table name scan s = new scan(); resultscanner ss = table.getscanner(s); for(result r:ss){ for(keyvalue kv : r.raw()){ //system.out.println("rowkey :" +dataarray[12]+"-"+dataarray[13]); //system.out.print(new string(kv.getrow()) + " "); if((dataarray[12]+"-"+dataarray[13]).equals(new string(kv.getrow()))){ //comparing rowkeys file , table (doesn't work) system.out.println("file rowkey :"+dataarray[12]+"-"+dataarray[13]); system.out.println("table row key"+new string(kv.getrow())); //dataarray[392]=new string(kv.getvalue()); filewriter fstream = new filewriter("/path/to/the/file/*.tsv",true); bufferedwriter fbw = new bufferedwriter(fstream); fbw.write(new string(kv.getvalue())); //inserting value tsv file fbw.newline(); fbw.close(); system.out.println("column value written succesfully"); } else //always executes part { system.out.println("rowkey not found :" +new string(kv.getrow())); } /*system.out.print(new string(kv.getfamily()) + ":"); system.out.print(new string(kv.getqualifier()) + " "); system.out.print(kv.gettimestamp() + " ");*/ //system.out.println(new string(kv.getvalue())); list.add(item); } } } iterator<string> = list.iterator(); while (it.hasnext()) { string txt = it.next(); system.out.print(txt+" "); } system.out.println(); // print data line. datarow = tsvfile.readline(); } tsvfile.close(); system.out.println(); } //main() }
sample record :
dataarray[12]+"-"+dataarray[13] = 3049620139673452544-5172983457411783096
in hbase table, rowkey has values in same format.
i can't share whole record has 300+ columns.
tsv file size: around 10gb
hbase table : around 10254950 rows.
any appreciated. in advance.
instead of writing this
if((dataarray[12]+"-"+dataarray[13]).equals(new string(kv.getrow()))){ //comparing rowkeys file , table (doesn't work)
try this
if((dataarray[12]+"-"+dataarray[13]).equals(bytes.tostring(kv.getrow()))){
you have not row value correctly.
try updated code, uses instead of scan hbase , takes less time run
while (datarow != null) { list.clear(); string[] dataarray = datarow.split("\t"); (string item : dataarray) { string key = dataarray[12] + "-" + dataarray[13]; htable table = new htable(conf, "table name"); // hbase table // name get = new get(bytes.tobytes(key)); result r = table.get(get); if (r != null && r.size() > 0) { (keyvalue kv : r.raw()) { system.out.println("file rowkey :" + key); system.out.println("table row key" + bytes.tostring(kv.getrow())); filewriter fstream = new filewriter( "/path/to/the/file/*.tsv", true); bufferedwriter fbw = new bufferedwriter(fstream); fbw.write(new string(kv.getvalue())); // inserting // value // tsv file fbw.newline(); fbw.close(); system.out.println("column value written succesfully"); } } else { system.out.println("rowkey not found :" + key); } list.add(item); } }
Comments
Post a Comment