Introduction to Python API

[1]:
from varspark import VariantsContext
[2]:
vc = VariantsContext(spark)
Running on Apache Spark version 2.2.1
SparkUI available at http://140.253.176.47:4040
Welcome to
 _    __           _             __  _____                  __
| |  / /___ ______(_)___ _____  / /_/ ___/____  ____ ______/ /__
| | / / __ `/ ___/ / __ `/ __ \/ __/\__ \/ __ \/ __ `/ ___/ //_/
| |/ / /_/ / /  / / /_/ / / / / /_ ___/ / /_/ / /_/ / /  / ,<
|___/\__,_/_/  /_/\__,_/_/ /_/\__//____/ .___/\__,_/_/  /_/|_|
                                      /_/
[3]:
fs = vc.import_vcf('../data/chr22_1000.vcf')
[4]:
ls = vc.load_label('../data/chr22-labels.csv', '22_16050408')

Running importance analysis

[5]:
ia = fs.importance_analysis(ls, mtry_fraction=0.1, seed = 13)
[6]:
ia.important_variables()
[6]:
[(u'22_16050408', 0.0017817043235329407),
 (u'22_16050678', 0.0010904805815147934),
 (u'22_16051480', 0.0010668521995151252),
 (u'22_16051107', 0.0008082666531272816),
 (u'22_16052838', 0.0006518182467402832),
 (u'22_16053197', 0.0005765782334748871),
 (u'22_16052656', 0.0004967689362074311),
 (u'22_16051882', 0.0004909085098234914),
 (u'22_16053435', 0.0004820979788328268),
 (u'22_16053727', 0.0004199166942675162)]