테이블 로딩을 위해 텍스트 파일에 수십억 개의 레코드를 생성해야 합니다.
내 대상 테이블 정의는 다음과 같습니다.
CREATE TABLE txnrecords12(
txnno int,
txndate string,
custno int,
amount double,
category string,
product string,
city string,
state string,
spendby string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
데이터를 생성하기 위한 내 코드는 다음과 같습니다.
############!/bin/sh
####### Create file dynamically
if [ ! -d hero_work ]
then
mkdir hero_work
fi
TEMPDIR=$HOME/hero_work
cd $TEMPDIR
touch $TEMPDIR/big_data_file_$$
echo $"string1\n",printf "string1\n",printf "string1\n">>big_data_file_$$
####################################
### Create data file dynamically####
####################################
if [ ! -d hero_work ]
then
mkdir hero_work
fi
TEMPDIR=$HOME/hero_work
cd $TEMPDIR
touch $TEMPDIR/big_data_file_$$
n1='
'
state_factor=$1
category_factor=$2
city_factor=$3
product_factor=$4
hiphen="-"
comma=","
### Write Table Columns Below
Col1="Txnno"
Col2="Txndate"
Col3="Custno"
Col4="Amount"
Col5="Category"
Col6="Product"
Col7="City"
Col8="State"
Col9="Spend_by"
####### Randomvariable declarations
rand1_Date_d="01"
rand2_Date_m="01"
rand5_Date_year="1999"
rand3_Transaction="0014"
rand4_cust_no="01155"
rand6_amount_no="0000"
######Column related variable declaration
var1_col1_txnno=0
var2_col2_txndate=0
var3_col3_custno=0
var4_col4_amount=0
var5_col5_category=0
var6_col6_product=0
var7_col7_city=0
var8_col8_state=0
var9_col9_spendby=0
write_value=${var1_col1_txnno}${comma}${var2_col2_txndate}${comma}${var3_col3_custno}${comma}${var4_col4_amount}${comma}${var5_col5_category}${comma}${var6_col6_product}${comma}${var7_col7_city}${comma}${var8_col8_state}${comma}${var9_col9_spendby}
Column_list=$Col1${comma}Col2${comma}$Col3${comma}$Col4${comma}$Col5${comma}$Col6${comma}$Col7${comma}$Col8${comma}$Col9
echo "$Column_list">big_data_file_$$
#####
####### Array of States
State[0]="UP"
State[1]="MP"
State[2]="Punjab"
State[3]="Delhi"
State[4]="WB"
### Array of Cities
City[0]="ABC"
City[1]="BCD"
City[2]="KJL"
City[3]="CGL"
City[4]="PPL"
#### Array of Products
Product[0]="ICECREAM"
Product[1]="Wheat"
Product[2]="CLOTHES"
Produt[3]="Laptop"
Product[4]="Bags"
Product[5]="Books"
#### Array of Categories
Category[0]="Foods"
Category[1]="Wearings"
Category[2]="Electronics"
###########3 Loop variables were initialized below
var_state_loop=0
var_city_loop=0
var_category_loop=0
var_product_loop=0
while (( var_state_loop -le $state_factor ))
do
if[ $var_state_loop -le 4 ]
then
echo "State loop part starts here.."
$var8_col8_state=${State[$var_state_loop]}
else
echo "State loop part ends here.."
while((var_city_loop -le ${city_factor} ))
do
echo "City Loop starts here"
if[ $var_city_loop -le 4 ]
then
$var7_col7_city=${City[$var_city_loop]}
else
echo "City Loop ends here"
while((var_category_loop -le ${category_factor}))
do
echo "Category loop started from here"
if[ $var_category_loop -le 3 ]
then
$var5_col5_category=${Category[$var_category_loop]}
else
echo"Category loop ended"
while((var_product_loop -le 6))
do
if [ $var_product_loop -le 6 ]
then
$var6_col6_product=${Product[$var_product_loop]}
$var1_col1_txnno=${var8_col8_state}${var7_col7_city}${var5_col5_category}${var6_col6_product}${rand3_Transaction}
while((rand5_Date_year -le 2016))
do
echo "starting date writing"
if[ ${rand1_Date_d} -le 31 -a ${rand2_Date_m} -le 12 ]
then
$var2_col2_txndate=${rand1_Date_d}${hiphen}${rand2_Date_m}${hiphen}${rand5_Date_year}
else
echo "Date part completed"
((ran5_Date_year+=1)))
done
$var3_col3_custno=${var8_col8_state}${var7_col7_city}${var5_col5_category}${var6_col6_product}${rand4_cust_no}
$var4_col4_amount=${rand6_amount_no}
$var9_col9_spendby=${var3_col3_custno}${hiphen}${var7_col7_city}
echo "The product loop finished for one product"
write_value=${var1_col1_txnno}${comma}${var2_col2_txndate}${comma}${var3_col3_custno}${comma}${var4_col4_amount}${comma}${var5_col5_category}${comma}${var6_col6_product}${comma}${var7_col7_city}${comma}${var8_col8_state}${comma}${var9_col9_spendby}
echo ${write_value}>>big_data_file_$$
##### Product end variable declaration
((rand3_Transaction+=1))
((rand6_amount_no+=212))
((var_product_loop+=1))
done
((var_category+=1))
done
((var_city_loop+=1))
done
((var_state_loop+=1))
done
코드를 실행할 때마다 다음 오류가 발생합니다.
line 94: syntax error near unexpected token `then'
biggun.ksh: line 94: ` then'
답변1
몇 가지 오류를 발견했습니다:
- shebang은 #!/bin/sh여야 합니다. 이것들은 모두
#
문법적 오류입니다. - 15-21행은 4-11행과 동일합니다(제거).
- var
$
에 58번째 줄이 없습니다 .Col2
Column_list=
- 59행에서는
echo "$Column_list">big_data_file_$$
이전에 기록된 모든 정보를 지웁니다big_data_file$$
. 로 변경>>
. - 93, 103, 112, 124행을
if[
로 변경해야 합니다if [
. - 114번째 줄에는
echo"Category
공백이 있어야 합니다:echo "Category
. - 129번째 줄에는 클로저가 3개 있는데
)
, 1개를 제거하세요. - 129행, (( )) 구문은 sh(shebang)에서 유효하지 않습니다.
- 여러 터미네이터가 누락되었으며
fi
(최소 5개) 하나가 누락되었을 수 있습니다done
.
피곤해요. 코드를 테스트하고, 정리하고, 숙제를 하세요.