hadoop - Comparing values from a TSV file and Hbase table in java -


i have hbase table has unique rowkey , 1 column family , 1 column.. have tsv file has around 300+ columns. rowkey in file combined value of 2 columns. need compare rowkey table , file, if rowkey matches need insert table column value last column in tsv file respective rowkey. have written following code, executes else part.

package mapreduce;  import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.filereader; import java.io.filewriter; import java.util.arraylist; import java.util.iterator; import java.util.list;  import org.apache.hadoop.conf.configuration; import org.apache.hadoop.hbase.hbaseconfiguration; import org.apache.hadoop.hbase.keyvalue; import org.apache.hadoop.hbase.client.htable; import org.apache.hadoop.hbase.client.result; import org.apache.hadoop.hbase.client.resultscanner; import org.apache.hadoop.hbase.client.scan;  public class tsv_read{      private static configuration conf = null;      static {         conf = hbaseconfiguration.create();     }      @suppresswarnings("resource")     public static void main(string[] arg) throws exception {          bufferedreader tsvfile =                  new bufferedreader(new filereader("path/to/file/.tsv"));          string datarow = tsvfile.readline();         list<string> list = new arraylist<string>();           while (datarow != null){             list.clear();             string[] dataarray = datarow.split("\t");                 (string item:dataarray) {                   htable table = new htable(conf, "table name"); //hbase table name                 scan s = new scan();                 resultscanner ss = table.getscanner(s);                 for(result r:ss){                     for(keyvalue kv : r.raw()){                         //system.out.println("rowkey :" +dataarray[12]+"-"+dataarray[13]);                         //system.out.print(new string(kv.getrow()) + " ");                         if((dataarray[12]+"-"+dataarray[13]).equals(new string(kv.getrow()))){  //comparing rowkeys file , table  (doesn't work)                             system.out.println("file rowkey :"+dataarray[12]+"-"+dataarray[13]);                             system.out.println("table row key"+new string(kv.getrow()));                             //dataarray[392]=new string(kv.getvalue());                             filewriter fstream = new filewriter("/path/to/the/file/*.tsv",true);                             bufferedwriter fbw = new bufferedwriter(fstream);                             fbw.write(new string(kv.getvalue())); //inserting value tsv file                             fbw.newline();                             fbw.close();                             system.out.println("column value written succesfully");                         }                         else //always executes part                         {                             system.out.println("rowkey not found :" +new string(kv.getrow()));                         }                         /*system.out.print(new string(kv.getfamily()) + ":");                        system.out.print(new string(kv.getqualifier()) + " ");                        system.out.print(kv.gettimestamp() + " ");*/                         //system.out.println(new string(kv.getvalue()));                   list.add(item);                     }                 }             }              iterator<string> = list.iterator();             while (it.hasnext()) {                 string txt = it.next();                 system.out.print(txt+" ");             }              system.out.println(); // print data line.             datarow = tsvfile.readline();          }          tsvfile.close();          system.out.println();      } //main() }  

sample record :

dataarray[12]+"-"+dataarray[13] = 3049620139673452544-5172983457411783096

in hbase table, rowkey has values in same format.

i can't share whole record has 300+ columns.

tsv file size: around 10gb

hbase table : around 10254950 rows.

any appreciated. in advance.

instead of writing this

if((dataarray[12]+"-"+dataarray[13]).equals(new string(kv.getrow()))){ //comparing rowkeys file , table (doesn't work)

try this

if((dataarray[12]+"-"+dataarray[13]).equals(bytes.tostring(kv.getrow()))){

you have not row value correctly.

try updated code, uses instead of scan hbase , takes less time run

    while (datarow != null) {         list.clear();         string[] dataarray = datarow.split("\t");          (string item : dataarray) {              string key = dataarray[12] + "-" + dataarray[13];             htable table = new htable(conf, "table name"); // hbase table                                                             // name             get = new get(bytes.tobytes(key));             result r = table.get(get);             if (r != null && r.size() > 0) {                 (keyvalue kv : r.raw()) {                     system.out.println("file rowkey :" + key);                     system.out.println("table row key"                             + bytes.tostring(kv.getrow()));                     filewriter fstream = new filewriter(                             "/path/to/the/file/*.tsv", true);                     bufferedwriter fbw = new bufferedwriter(fstream);                     fbw.write(new string(kv.getvalue())); // inserting                                                             // value                                                             // tsv file                     fbw.newline();                     fbw.close();                     system.out.println("column value written succesfully");                 }             } else {                 system.out.println("rowkey not found :" + key);             }             list.add(item);         }     } 

Comments