Skip to content

Commit

Permalink
Merge branch 'hotfix/0.9.10'
Browse files Browse the repository at this point in the history
  • Loading branch information
mheilman committed Aug 22, 2013
2 parents 917f47f + 16375a2 commit d926572
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 12 deletions.
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ Requirements
Changelog
~~~~~~~~~

- v0.9.10
+ Fixed bug introduced in v0.9.9 that broke "predict" mode.

- v0.9.9

+ Automatically generate a result summary file with all results for
Expand Down
12 changes: 8 additions & 4 deletions skll/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,6 @@ def _classify_featureset(jobname, featureset, given_learner, train_path,
'{}.results.json'.format(jobname))

# create a list of dictionaries of the results information
learner_result_dicts = []
learner_result_dict_base = {'train_set_name': train_set_name,
'test_set_name': test_set_name,
'featureset': featureset,
Expand Down Expand Up @@ -417,6 +416,10 @@ def _classify_featureset(jobname, featureset, given_learner, train_path,
def _create_learner_result_dicts(task_results, grid_scores,
learner_result_dict_base):
res = []

if not task_results:
return res

num_folds = len(task_results)
accuracy_sum = 0.0
score_sum = None
Expand Down Expand Up @@ -768,9 +771,10 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
'{}'.format(result_dict))

# write out the summary results file
summary_file_name = '_'.join(base_name_components) + '_summary.tsv'
with open(os.path.join(resultspath, summary_file_name), 'w') as output_file:
_write_summary_file(result_json_paths, output_file)
if task == 'cross-validate' or task == 'evaluate':
summary_file_name = '_'.join(base_name_components) + '_summary.tsv'
with open(os.path.join(resultspath, summary_file_name), 'w') as output_file:
_write_summary_file(result_json_paths, output_file)


def _run_experiment_without_feature(arg_tuple):
Expand Down
2 changes: 1 addition & 1 deletion skll/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@
:organization: ETS
'''

__version__ = '0.9.9'
__version__ = '0.9.10'
VERSION = tuple(int(x) for x in __version__.split('.'))
52 changes: 45 additions & 7 deletions tests/test_skll.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,26 @@ def make_cv_folds_data():
csv_out.write('{},{}\n'.format(ex_id, k))


def fill_in_config_paths(config_template_path, xval=True):
def fill_in_config_paths(config_template_path, task='cross-validate'):
with open(config_template_path, 'r') as config_template:
config = _parse_config_file(config_template)

config.set("Input", "train_location", os.path.join(_my_dir, 'train'))
for d in ['results', 'log', 'models', 'vocabs', 'predictions']:

to_fill_in = ['log', 'models', 'vocabs', 'predictions']

if task == 'evaluate' or task == 'cross-validate':
to_fill_in.append('results')

for d in to_fill_in:
config.set("Output", d, os.path.join(_my_dir, 'output'))

if xval:
if task == 'cross-validate':
cv_folds_location = config.get("Input", "cv_folds_location")
if cv_folds_location:
config.set("Input", "cv_folds_location", os.path.join(_my_dir, 'train', cv_folds_location))
else:

if task == 'predict' or task == 'evaluate':
config.set("Input", "test_location", os.path.join(_my_dir, 'test'))

new_config_path = '{}.cfg'.format(re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0])
Expand Down Expand Up @@ -173,7 +180,7 @@ def test_specified_cv_folds():


def make_regression_data():
num_examples = 1000
num_examples = 2000

np.random.seed(1234567890)
f1 = np.random.rand(num_examples)
Expand All @@ -183,7 +190,13 @@ def make_regression_data():
y = 1.0 * f1 + 1.0 * f2 - 2.0 * f3 + err

with open(os.path.join(_my_dir, 'train', 'test_regression1.jsonlines'), 'w') as f:
for i in range(num_examples):
for i in range(num_examples / 2):
ex_id = "EXAMPLE{}".format(i)
x = {"f1": f1[i], "f2": f2[i], "f3": f3[i]}
f.write(json.dumps({"y": y[i], "id": ex_id, "x": x}) + '\n')

with open(os.path.join(_my_dir, 'test', 'test_regression1.jsonlines'), 'w') as f:
for i in range(num_examples / 2, num_examples):
ex_id = "EXAMPLE{}".format(i)
x = {"f1": f1[i], "f2": f2[i], "f3": f3[i]}
f.write(json.dumps({"y": y[i], "id": ex_id, "x": x}) + '\n')
Expand Down Expand Up @@ -226,6 +239,31 @@ def test_regression1():
assert abs(np.std(pred) - np.std(y)) < 0.1


def test_predict():
'''
This tests whether predict task runs.
'''

_, y = make_regression_data()

config_template_path = os.path.join(_my_dir, 'configs', 'test_predict.template.cfg')
config_path = fill_in_config_paths(config_template_path, task='predict')

config_template_path = "test_predict.cfg"

with open(os.path.join(_my_dir, config_path)) as cfg:
run_configuration(cfg, local=True)

with open(os.path.join(_my_dir, 'test', 'test_regression1.jsonlines')) as test_file:
inputs = [x for x in test_file]
assert len(inputs) == 1000

with open(os.path.join(_my_dir, 'output', 'train_test_unscaled_tuned_pearson_predict_test_regression1_RescaledRidge.predictions')) as outfile:
reader = csv.DictReader(outfile, dialect=csv.excel_tab)
predictions = [x['prediction'] for x in reader]
assert len(predictions) == len(inputs)


def make_summary_data():
num_train_examples = 500
num_test_examples = 100
Expand Down Expand Up @@ -257,7 +295,7 @@ def test_summary():
make_summary_data()

config_template_path = os.path.join(_my_dir, 'configs', 'test_summary.template.cfg')
config_path = fill_in_config_paths(config_template_path, xval=False)
config_path = fill_in_config_paths(config_template_path, task='evaluate')

with open(config_path, 'r') as cfg:
run_configuration(cfg, local=True)
Expand Down

0 comments on commit d926572

Please sign in to comment.