Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using a local model in input_model causes SameFileError in MergeAdapters Pass #1442

Open
samuel100 opened this issue Oct 30, 2024 · 0 comments · May be fixed by #1485
Open

Using a local model in input_model causes SameFileError in MergeAdapters Pass #1442

samuel100 opened this issue Oct 30, 2024 · 0 comments · May be fixed by #1485
Assignees
Labels
bug Something isn't working

Comments

@samuel100
Copy link
Contributor

The following config errors on Merge Adapters pass:

{
    "input_model": {
        "type": "HfModel",
        "model_path": "/model",
        "load_kwargs": { "trust_remote_code": true }
        
    },
    "systems": {
        "local_system": {
            "type": "LocalSystem",
            "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
        }
    },
    "data_configs": [
        {
            "name": "dataset_default_train",
            "type": "HuggingfaceContainer",
            "load_dataset_config": {
                "params": {
                    "data_name": "json", 
                    "data_files":"kinfey/datasets/datasets.json",
                    "split": "train[:19000]"
                }
            },
            "pre_process_data_config": { "text_template": "### Question: {INSTRUCTION} \n### Answer: {RESPONSE}"}
        },
        {
            "name": "dataset_default_eval",
            "type": "HuggingfaceContainer",
            "load_dataset_config": {
                "params": {
                    "data_name": "json", 
                    "data_files":"kinfey/datasets/datasets.json",
                    "split": "train[19000:19995]"
                }
            },
            "pre_process_data_config": { "text_template": "### Question: {INSTRUCTION} \n### Answer: {RESPONSE}"}
        },
        {
            "name": "gqa_transformer_prompt_dummy_data",
            "type": "TransformersPromptDummyDataContainer",
            "load_dataset_config": { "use_fp16": true, "shared_kv": true, "trust_remote_code": true }
        }
    ],
    "evaluators": {
        "common_evaluator": {
            "metrics": [
                {
                    "name": "latency",
                    "type": "latency",
                    "sub_types": [
                        { "name": "avg", "priority": 1, "goal": { "type": "percent-min-improvement", "value": 20 } },
                        { "name": "max" },
                        { "name": "min" }
                    ],
                    "data_config": "gqa_transformer_prompt_dummy_data",
                    "user_config": { "io_bind": true }
                }
            ]
        }
    },
    "passes": {
        "lora": {
            "type": "LoRA",
            "target_modules": [ "o_proj", "qkv_proj" ],
            "train_data_config": "dataset_default_train",
            "training_args": {
                "per_device_train_batch_size": 1,
                "per_device_eval_batch_size": 1,
                "gradient_accumulation_steps": 4,
                "gradient_checkpointing": false,
                "max_steps": 10,
                "logging_steps": 10,
                "max_grad_norm": 0.3
            },
            "eval_data_config": "dataset_default_eval"
        },
        "merge_adapter_weights": { "type": "MergeAdapterWeights" },
        "awq": { "type": "AutoAWQQuantizer" },
        "builder": { "type": "ModelBuilder", "precision": "int4" },
        "fp2_logits": { "type": "OnnxIOFloat16ToFloat32" },
        "tune_session_params": {
            "type": "OrtSessionParamsTuning",
            "data_config": "gqa_transformer_prompt_dummy_data",
            "io_bind": true,
            "execution_providers_list": [ "CUDAExecutionProvider" ],
            "opt_level_list": [ 0, 1 ],
            "execution_mode_list": [ 0, 1 ]
        }
    },
    "search_strategy": false,
    "cache_dir": ".olive-cache-kinfey",
    "output_dir": "models/phi3-LoRA",
    "host": "local_system",
    "target": "local_system"
}

The error is:

shutil.SameFileError: '/root/.cache/huggingface/modules/transformers_modules/Phi-3-mini-128k-instruct/configuration_phi3.py' and PosixPath('/content/olive-demo/fine-tuning/cache/default_workflow/runs/9d51c530/models/configuration_phi3.py') are the same file[2024-10-30 16:36:53,864] [WARNING] [engine.py:343:run_accelerator] Failed to run Olive on gpu-cuda.Traceback (most recent call last):  File "/usr/local/lib/python3.10/dist-packages/olive/engine/engine.py", line 331, in run_accelerator    output_footprint = self.run_no_search(input_model_config, input_model_id, accelerator_spec, output_dir)  File "/usr/local/lib/python3.10/dist-packages/olive/engine/engine.py", line 409, in run_no_search    should_prune, signal, model_ids = self._run_passes(  File "/usr/local/lib/python3.10/dist-packages/olive/engine/engine.py", line 673, in _run_passes    model_config, model_id = self._run_pass(  File "/usr/local/lib/python3.10/dist-packages/olive/engine/engine.py", line 773, in _run_pass    output_model_config = host.run_pass(p, input_model_config, output_model_path, pass_search_point)  File "/usr/local/lib/python3.10/dist-packages/olive/systems/local.py", line 30, in run_pass    output_model = the_pass.run(model, output_model_path, point)  File "/usr/local/lib/python3.10/dist-packages/olive/passes/olive_pass.py", line 206, in run    output_model = self._run_for_config(model, config, output_model_path)  File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context    return func(*args, **kwargs)  File "/usr/local/lib/python3.10/dist-packages/olive/passes/pytorch/merge_adapter_weights.py", line 52, in _run_for_config    model.save_metadata(output_model_path)  File "/usr/local/lib/python3.10/dist-packages/olive/model/handler/mixin/hf.py", line 86, in save_metadata    save_model_config(config, output_dir, **kwargs)  File "/usr/local/lib/python3.10/dist-packages/olive/common/hf/utils.py", line 95, in save_model_config    config.save_pretrained(output_dir, **kwargs)  File "/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py", line 466, in save_pretrained    custom_object_save(self, save_directory, config=self)  File "/usr/local/lib/python3.10/dist-packages/transformers/dynamic_module_utils.py", line 581, in custom_object_save    shutil.copy(object_file, dest_file)  File "/usr/lib/python3.10/shutil.py", line 417, in copy    copyfile(src, dst, follow_symlinks=follow_symlinks)  File "/usr/lib/python3.10/shutil.py", line 234, in copyfile    raise SameFileError("{!r} and {!r} are the same file".format(src, dst))shutil.SameFileError: '/root/.cache/huggingface/modules/transformers_modules/Phi-3-mini-128k-instruct/configuration_phi3.py' and PosixPath('/content/olive-demo/fine-tuning/cache/default_workflow/runs/9d51c530/models/configuration_phi3.py') are the same file[2024-10-30 16:36:54,766] [INFO] [engine.py:274:run] Run history for gpu-cuda:[2024-10-30 16:36:54,773] [INFO] [engine.py:526:dump_run_history] run history:+------------+-------------------+-------------+----------------+-----------+| model_id   | parent_model_id   | from_pass   |   duration_sec | metrics   |+============+===================+=============+================+===========+| 850e9859   |                   |             |                |           |+------------+-------------------+-------------+----------------+-----------+| 09942012   | 850e9859          | LoRA        |        145.119 |           |+------------+-------------------+-------------+----------------+-----------+[2024-10-30 16:36:54,773] [DEBUG] [engine.py:289:run] No packaging config provided, skip packaging artifacts

If the input_model is changed to Hugging Face, then it works.

@samuel100 samuel100 added the bug Something isn't working label Oct 30, 2024
@jambayk jambayk self-assigned this Nov 13, 2024
@jambayk jambayk linked a pull request Nov 14, 2024 that will close this issue
6 tasks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants