#varietyjs
1 catatan
82 view
Fri, 14 Oct 2016 10:29 am
Masalah kolom-kolom data yang bervariasi (selama ini jadi momok) #alhamdulillah bisa dihandle dengan baik oleh #mongodb. Sampe hari ini sudah ada 6 jenis data dengan berbagai macam bentuknya bisa masuk ke database. Permasalahan berikutnya adalah mengidentifikasi setiap data ini, step awalnya menurut ane adalah mengidentifikasi kolom/field-field dulu. Data sudah masuk -> inventarisir kolom-kolomnya. Nah, menginventarisir kolom yang buanyak ini, bisa dengan cara seperti dibawah ini.
Command untuk melihat schema collection #mongodb, menggunakan #varietyjs. Very #nice saudara-saudara.
root@newportalkanwil:~# mongo 10.29.254.215/data_mentah -u dataadmin -p GunungMerapi2012 --eval "var collection = 'bulk', persistResults=true, resultsDatabase='data_mentah'" /web/variety.js MongoDB shell version: 3.0.2 connecting to: 10.29.254.215/data_mentah Variety: A MongoDB Schema Analyzer Version 1.5.0, released 14 May 2015 Using collection of "bulk" Using query of { } Using limit of 224060 Using maxDepth of 99 Using sort of { "_id" : -1 } Using outputFormat of "ascii" Using persistResults of true Using resultsDatabase of "data_mentah" Using resultsCollection of "bulkKeys" Using resultsUser of null Using resultsPass of null Using logKeysContinuously of false Using excludeSubkeys of [ ] Using arrayEscape of "XX" Using plugins of [ ] replacing results collection: bulkKeys +---------------------------------------------------------------------------------------------------------------------------------------------+ | key | types | occurrences | percents | | ---------------------------------------------- | --------------------------------------------------- | ----------- | ---------------------- | | JENIS_DATA | String | 224060 | 100.000000000000000000 | | _id | ObjectId | 224060 | 100.000000000000000000 | | cab | String | 201642 | 89.994644291707572847 | | kpp | String | 201642 | 89.994644291707572847 | | npwp | String | 201642 | 89.994644291707572847 | | nama | String | 189952 | 84.777291796840131610 | | tahun | null (71428),String (117591) | 189019 | 84.360885477104346819 | | jum_fp | null (78461),Number (108335),String (2) | 186798 | 83.369633133981963624 | | kd_kwl | String | 186798 | 83.369633133981963624 | | klu | null (1931),String (184867) | 186798 | 83.369633133981963624 | | kpp_skrg | String | 186798 | 83.369633133981963624 | | nama_lawan | null (71428),String (115370) | 186798 | 83.369633133981963624 | | nm_klu | null (1931),String (184867) | 186798 | 83.369633133981963624 | | nm_kpp | String | 186798 | 83.369633133981963624 | | nm_kwl | String | 186798 | 83.369633133981963624 | | tot_dpp | null (78461),Number (108337) | 186798 | 83.369633133981963624 | | tot_ppn | null (78461),Number (108337) | 186798 | 83.369633133981963624 | | tot_ppnbm | null (78464),Number (108334) | 186798 | 83.369633133981963624 | | NPWP | String | 20292 | 9.056502722485047840 | | CAB | String | 20285 | 9.053378559314468887 | | KLU | String (20023),null (245) | 20268 | 9.045791305900205970 | | KPP | String | 20227 | 9.027492635901097628 | | kode_kanwil | String | 14844 | 6.625011157725609223 | | kode_kpp_adm | String | 14680 | 6.551816477729179411 | | KPP_ADM | String | 11845 | 5.286530393644559567 | | 1 | null | 10491 | 4.682227974649647351 | | COUNT(NILAI) | Number | 10491 | 4.682227974649647351 | | DETAIL | String | 10491 | 4.682227974649647351 | | JENIS_WP | String | 10491 | 4.682227974649647351 | | KATEGORI | String | 10491 | 4.682227974649647351 | | KWL | String | 10491 | 4.682227974649647351 | | NAMAWP | String | 10491 | 4.682227974649647351 | | NAMA_ASSIGN_AR | String | 10491 | 4.682227974649647351 | | NIP_ASSIGN_AR | String | 10491 | 4.682227974649647351 | | NO_ANALISIS | String | 10491 | 4.682227974649647351 | | NPWPT | String | 10491 | 4.682227974649647351 | | OUTSTANDING (AKHIR-USUL) | String (9791),Number (700) | 10491 | 4.682227974649647351 | | POTENSI_AKHIR | String (9791),Number (700) | 10491 | 4.682227974649647351 | | POTENSI_USUL | null | 10491 | 4.682227974649647351 | | SUM(NILAI) | null | 10491 | 4.682227974649647351 | | THPJ | String | 10491 | 4.682227974649647351 | | WK | String (10489),null (2) | 10491 | 4.682227974649647351 | | KODE_KANWIL | String | 9777 | 4.363563331250557731 | | NAMA_KANWIL | String | 9777 | 4.363563331250557731 | | NM_KLU | String (9532),null (245) | 9777 | 4.363563331250557731 | | ALAMAT_LAWAN | null (2186),String (6261) | 8447 | 3.769972328840489162 | | JMLBPOT | null (2416),Number (6031) | 8447 | 3.769972328840489162 | | KPP_SKRG | String | 8447 | 3.769972328840489162 | | NAMA_KPP | String | 8447 | 3.769972328840489162 | | NAMA_LAWAN | null (1888),String (6559) | 8447 | 3.769972328840489162 | | NAMA_PEMOTONG | String | 8447 | 3.769972328840489162 | | TAHUN_PAJAK | null (1888),String (6559) | 8447 | 3.769972328840489162 | | TOT_DPP | null (2416),Number (6031) | 8447 | 3.769972328840489162 | | TOT_PPH | null (2416),Number (6031) | 8447 | 3.769972328840489162 | | est_hrg_kend_faktur | null (4084),Number (3074),String (27) | 7185 | 3.206730340087476705 | | nama_wp | String | 6215 | 2.773810586450057958 | | est_hrg_properti_sid | null (4447),Number (850) | 5297 | 2.364098902079800002 | | dpp | NumberLong (2221),Number (2548),String (4) | 4773 | 2.130232973310720368 | | nama_pembeli | String | 4773 | 2.130232973310720368 | | nama_penjual | String | 4773 | 2.130232973310720368 | | ppn | Number | 4773 | 2.130232973310720368 | | nilai_aset_njop_pbb | null (4594),String (36),Number (69) | 4699 | 2.097206105507453344 | | est_hrg_properti_faktur | null (2847),Number (1786) | 4633 | 2.067749709899134203 | | est_njkb_kend_samsat | null (4504),Number (118),String (11) | 4633 | 2.067749709899134203 | | jml_kapal_ikan | null (4604),String (27),Number (2) | 4633 | 2.067749709899134203 | | nilai_obligasi | null | 4633 | 2.067749709899134203 | | nilai_selisih_piutang_sahamVSmodal_utang_pengh | null | 4633 | 2.067749709899134203 | | total_nilai_saham_ahu | NumberLong (733),null (3823),String (3),Number (74) | 4633 | 2.067749709899134203 | | total_saham_ksei | null (3385),Number (1216),String (32) | 4633 | 2.067749709899134203 | | wp_tltd | String (2738),null (1010) | 3748 | 1.672766223333035818 | | faktur_pengganti | null (2545),String (7) | 2552 | 1.138980630188342502 | | masa_pajak | String | 2552 | 1.138980630188342502 | | no_seri_faktur | String | 2552 | 1.138980630188342502 | | npwp_penjual | String | 2552 | 1.138980630188342502 | | pembetulan | String | 2552 | 1.138980630188342502 | | ppnbm | Number | 2552 | 1.138980630188342502 | | tahun_pajak | String | 2552 | 1.138980630188342502 | | tanggal_faktur | String | 2552 | 1.138980630188342502 | | alamat_wp | String | 2467 | 1.101044363117022140 | | bulan_pajak | String | 2401 | 1.071587967508702999 | | cab_pemotong | String | 2401 | 1.071587967508702999 | | jenis_transaksi | String | 2401 | 1.071587967508702999 | | jml_lembar_saham_ksei | Number (2363),String (38) | 2401 | 1.071587967508702999 | | kpp_pemotong | String | 2401 | 1.071587967508702999 | | nama_pemotong | String | 2401 | 1.071587967508702999 | | npwp_pemotong | String | 2401 | 1.071587967508702999 | | thn_2012 | null (2361),Number (39),String (1) | 2401 | 1.071587967508702999 | | thn_2013 | null (2392),Number (9) | 2401 | 1.071587967508702999 | | thn_2014 | null (1939),Number (459),String (3) | 2401 | 1.071587967508702999 | | thn_2015 | Number (1856),null (511),String (34) | 2401 | 1.071587967508702999 | | alamat | String | 2269 | 1.012675176292064716 | | est_hrg_properti_FP | Number | 2221 | 0.991252343122377977 | | jumlah_faktur | Number | 2221 | 0.991252343122377977 | | identitas_pengirim | String | 2105 | 0.939480496295635126 | | jenis_data | String | 2105 | 0.939480496295635126 | | jenis_dokumen | String | 2105 | 0.939480496295635126 | | keterangan_sumber | String (2094),null (11) | 2105 | 0.939480496295635126 | | kode_jns_dokumen | String | 2105 | 0.939480496295635126 | | kode_sumber | String | 2105 | 0.939480496295635126 | | kota_alamat | String (1652),null (453) | 2105 | 0.939480496295635126 | | merk | String | 2105 | 0.939480496295635126 | | nama_kpp_adm | String | 2105 | 0.939480496295635126 | | nama_pengirim | String | 2105 | 0.939480496295635126 | | nama_sumber | String | 2105 | 0.939480496295635126 | | nilai_data | Number | 2105 | 0.939480496295635126 | | nomor_alket | String | 2105 | 0.939480496295635126 | | nomor_dokumen | String | 2105 | 0.939480496295635126 | | tahun_data | String | 2105 | 0.939480496295635126 | | KATEGORI_GUNGGUNG | null (473),String (857) | 1330 | 0.593591002410068680 | | KD_KPP_ADM | String | 1330 | 0.593591002410068680 | | KLU_2_DGT | String (1328),null (2) | 1330 | 0.593591002410068680 | | NAMA | String | 1330 | 0.593591002410068680 | | TAHUN | null (473),String (857) | 1330 | 0.593591002410068680 | | TOT_DPP_GUNGGUNG | null (473),Number (857) | 1330 | 0.593591002410068680 | | TOT_PENYERAHAN | null (473),Number (857) | 1330 | 0.593591002410068680 | | 2010 | null (1187),String (85) | 1272 | 0.567705078996697310 | | 2011 | null (1171),String (101) | 1272 | 0.567705078996697310 | | 2012 | null (1160),String (112) | 1272 | 0.567705078996697310 | | 2013 | null (1153),String (119) | 1272 | 0.567705078996697310 | | 2014 | String (855),null (417) | 1272 | 0.567705078996697310 | | Cab | String | 1272 | 0.567705078996697310 | | KPP_adm | String | 1272 | 0.567705078996697310 | | Kode_Kanwil | String | 1272 | 0.567705078996697310 | | NAMA PEMEGANG SAHAM | String | 1272 | 0.567705078996697310 | | NAMA PERUSAHAAN | String | 1272 | 0.567705078996697310 | | NO AKTA | String | 1272 | 0.567705078996697310 | | NOTARIS | String | 1272 | 0.567705078996697310 | | NPWP1 | String | 1272 | 0.567705078996697310 | | Nilai Saham | String | 1272 | 0.567705078996697310 | | TANGGAL AKTA | String | 1272 | 0.567705078996697310 | | URUT | Number | 1272 | 0.567705078996697310 | | npwp_all | String | 885 | 0.394983486566098385 | | sts_jeniswp | String | 885 | 0.394983486566098385 | | 2015 | String (841),null (13) | 854 | 0.381147906810675730 | | 2016 | String (840),null (14) | 854 | 0.381147906810675730 | | Selisih | String | 854 | 0.381147906810675730 | | WP | String | 854 | 0.381147906810675730 | | alamat_agunan | String | 664 | 0.296349192180665910 | | bukti_milik | String | 664 | 0.296349192180665910 | | jenis_agunan | String | 664 | 0.296349192180665910 | | nama_debitur | String | 664 | 0.296349192180665910 | | pemilik_agunan | String | 664 | 0.296349192180665910 | | bahan_bakar | String | 164 | 0.073194679996429535 | | bbnpk | Number | 164 | 0.073194679996429535 | | cc | Number | 164 | 0.073194679996429535 | | est_njkb | Number | 164 | 0.073194679996429535 | | kabkot | String | 164 | 0.073194679996429535 | | kode_kpp | String | 164 | 0.073194679996429535 | | merek | String | 164 | 0.073194679996429535 | | nama_mfwp | String | 164 | 0.073194679996429535 | | nik | String | 164 | 0.073194679996429535 | | nopol | String | 164 | 0.073194679996429535 | | pkbpk | Number | 164 | 0.073194679996429535 | | provinsi | String | 164 | 0.073194679996429535 | | tbt | Number | 164 | 0.073194679996429535 | | tgl_bayar | String | 164 | 0.073194679996429535 | | tipe_kendaraan | String | 164 | 0.073194679996429535 | | alamat_objek_pajak | String | 66 | 0.029456395608319200 | | kanwil_domisili_sppt | String | 66 | 0.029456395608319200 | | kpp_domisili_sppt | String | 66 | 0.029456395608319200 | | luas_bangunan_m2 | Number | 66 | 0.029456395608319200 | | luas_bumi_m2 | Number | 66 | 0.029456395608319200 | | luas_sit_tanaman_rp | Number | 66 | 0.029456395608319200 | | luas_tanaman_m2 | Number | 66 | 0.029456395608319200 | | nama_kanwil_lokasi | String | 66 | 0.029456395608319200 | | nama_kpp_lokasi | String | 66 | 0.029456395608319200 | | njop_bangunan_rp | NumberLong | 66 | 0.029456395608319200 | | njop_bumi_rp | Number | 66 | 0.029456395608319200 | | nop_sppt_pbb_2014 | String | 66 | 0.029456395608319200 | | alamat_pemilik | String | 38 | 0.016959742926001962 | | gt | Number | 38 | 0.016959742926001962 | | nama_kapal | String | 38 | 0.016959742926001962 | | nama_pemilik | String | 38 | 0.016959742926001962 | | KDKWL | String | 24 | 0.010711416584843345 | | NAMA_WP | String | 24 | 0.010711416584843345 | | NMKPP | String | 24 | 0.010711416584843345 | | NMKWL | String | 24 | 0.010711416584843345 | | TOT_DPP_NPWP_000 | Number (17),String (7) | 24 | 0.010711416584843345 | +---------------------------------------------------------------------------------------------------------------------------------------------+