AnyTool/toolbench/tooleval/run_preference.sh
2024-02-23 15:13:06 +08:00

17 lines
664 B
Bash

export CONVERTED_ANSWER_PATH=../../data/reproduction_data/model_predictions_converted/
export SAVE_PATH=output/preference_results
export PASS_TARE_PATH=output/pass_rate_results
export REFERENCE_MODEL=chatgpt_cot
export CANDIDATE_MODEL=gpt4_dfs_find_api
export API_POOL_FILE=../../openai_key.json
python eval_preference.py \
--converted_answer_path ${CONVERTED_ANSWER_PATH} \
--reference_model ${REFERENCE_MODEL} \
--output_model ${CANDIDATE_MODEL} \
--test_ids ../../data/test_query_ids/ \
--save_path ${SAVE_PATH} \
--pass_rate_result_path ${PASS_TARE_PATH} \
--max_eval_threads 20 \
--use_pass_rate true \
--evaluate_times 7