import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
df = pd.read_csv('../input/digit-recognizer/train.csv')
col = ['pixel%d'%i for i in range(784)]
lgb_params = {
"objective" : "multiclass",
"metric" : "multi_logloss",
"num_class" : 10,
"max_depth" : 5,
"num_leaves" : 15,
"learning_rate" : 0.1,
"bagging_fraction" : 1.0,
"feature_fraction" : 1.0,
"lambda_l1" : 0.0,
"lambda_l2" : 0.0,
}
X_train, X_test, Y_train, Y_test = train_test_split(df[col], df['label'], test_size=0.1)
lgtrain = lgb.Dataset(X_train, label=Y_train)
lgtest = lgb.Dataset(X_test, label=Y_test)
lgb_clf = lgb.train(lgb_params, lgtrain, 500,
valid_sets=[lgtrain, lgtest],
early_stopping_rounds=5,
verbose_eval=10)
df = pd.read_csv('../input/digit-recognizer/test.csv')
res = lgb_clf.predict( df[col] ).argmax(axis=1)
df = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
df['Label'] = res
df.to_csv('submission.csv', index=False)