{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"VLMEvalKit","owner":"open-compass","isFork":false,"description":"Open-source evaluation toolkit of large vision-language models (LVLMs), support ~100 VLMs, 40+ benchmarks","allTopics":["computer-vision","evaluation","pytorch","gemini","openai","vqa","vit","gpt","multi-modal","clip","claude","openai-api","gpt4","large-language-models","llm","chatgpt","llava","qwen","gpt-4v"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":9,"issueCount":24,"starsCount":1034,"forksCount":144,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,79,25,26,9,25,38,23,28,17,22,10,2,1,10,0,8,14,9,7,9,22,31,12,16,20,12,12,2,4,7,33,9,32,43,53,29,40,4,23,42,15,26,29],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-19T19:15:55.530Z"}},{"type":"Public","name":"opencompass","owner":"open-compass","isFork":false,"description":"OpenCompass is an LLM evaluation platform, supporting a wide range of models (Llama3, Mistral, InternLM2,GPT-4,LLaMa2, Qwen,GLM, Claude, etc) over 100+ datasets.","allTopics":["benchmark","evaluation","openai","llm","chatgpt","large-language-model","llama2","llama3"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":26,"issueCount":180,"starsCount":3777,"forksCount":403,"license":"Apache License 2.0","participation":[18,0,6,7,5,14,12,17,17,12,6,19,11,13,10,16,11,11,9,14,0,6,7,10,11,9,5,2,5,5,8,19,10,13,5,13,5,0,0,5,5,6,6,14,18,6,4,11,11,10,11,7],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-19T10:12:05.049Z"}},{"type":"Public","name":"MMBench","owner":"open-compass","isFork":false,"description":"Official Repo of \"MMBench: Is Your Multi-modal Model an All-around Player?\"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":3,"starsCount":143,"forksCount":9,"license":"Apache License 2.0","participation":[0,5,0,0,2,0,0,0,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-01T11:44:12.290Z"}},{"type":"Public","name":"hinode","owner":"open-compass","isFork":true,"description":"A clean documentation and blog theme for your Hugo site based on Bootstrap 5","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":51,"license":"MIT License","participation":[39,2,52,69,57,52,64,27,15,7,16,4,21,144,91,32,3,29,25,58,30,14,38,21,15,42,24,26,0,3,28,24,28,22,28,62,45,9,23,26,17,14,29,27,26,10,90,41,31,5,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-01T03:11:24.280Z"}},{"type":"Public","name":"storage","owner":"open-compass","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-18T18:25:33.773Z"}},{"type":"Public","name":"CompassBench","owner":"open-compass","isFork":false,"description":"Demo data of CompassBench","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":2,"starsCount":2,"forksCount":2,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,3,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-07T12:39:22.420Z"}},{"type":"Public","name":"CIBench","owner":"open-compass","isFork":false,"description":"Official Repo of \"CIBench: Evaluation of LLMs as Code Interpreter \"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":5,"forksCount":1,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,1,3,0,0,1,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-19T04:51:48.990Z"}},{"type":"Public","name":"GAOKAO-Eval","owner":"open-compass","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":2,"starsCount":88,"forksCount":6,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,10,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-17T10:58:18.281Z"}},{"type":"Public","name":"ANAH","owner":"open-compass","isFork":false,"description":"[ACL 2024] ANAH: Analytical Annotation of Hallucinations in Large Language Models","allTopics":["acl","gpt","llms","hallucination-detection"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":19,"forksCount":1,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,3,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-12T18:57:53.120Z"}},{"type":"Public","name":"MathBench","owner":"open-compass","isFork":false,"description":"[ACL 2024 Findings] MathBench: A Comprehensive Multi-Level Difficulty Mathematics Evaluation Dataset","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":5,"starsCount":77,"forksCount":1,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,4,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,9,2,0,0,0,0,4,0,2,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-12T05:17:51.572Z"}},{"type":"Public","name":"GTA","owner":"open-compass","isFork":false,"description":"Official repository for paper \"GTA: A Benchmark for General Tool Agents\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":29,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-12T03:23:24.351Z"}},{"type":"Public","name":".github","owner":"open-compass","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-31T02:27:09.928Z"}},{"type":"Public","name":"DevBench","owner":"open-compass","isFork":false,"description":"A Comprehensive Benchmark for Software Development.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":83,"forksCount":4,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,4,0,6,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-30T13:10:52.870Z"}},{"type":"Public","name":"CodeBench","owner":"open-compass","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T11:38:31.710Z"}},{"type":"Public","name":"Ada-LEval","owner":"open-compass","isFork":false,"description":"The official implementation of \"Ada-LEval: Evaluating long-context LLMs with length-adaptable benchmarks\"","allTopics":["gpt4","llm","long-context"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":49,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-22T09:36:27.993Z"}},{"type":"Public","name":"T-Eval","owner":"open-compass","isFork":false,"description":"[ACL2024] T-Eval: Evaluating Tool Utilization Capability of Large Language Models Step by Step","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":32,"starsCount":209,"forksCount":13,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-03T21:05:37.907Z"}},{"type":"Public","name":"human-eval","owner":"open-compass","isFork":true,"description":"Code for the paper \"Evaluating Large Language Models Trained on Code\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":332,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-14T11:55:53.553Z"}},{"type":"Public","name":"OpenFinData","owner":"open-compass","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":3,"starsCount":32,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-08T06:32:20.542Z"}},{"type":"Public","name":"CriticBench","owner":"open-compass","isFork":false,"description":"A comprehensive benchmark for evaluating critique ability of LLMs","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":25,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-24T01:31:08.418Z"}},{"type":"Public","name":"code-evaluator","owner":"open-compass","isFork":false,"description":"A multi-language code evaluation tool.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":17,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-26T04:12:27.117Z"}},{"type":"Public","name":"evalplus","owner":"open-compass","isFork":true,"description":"EvalPlus for rigourous evaluation of LLM-synthesized code","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":102,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-20T08:13:21.996Z"}},{"type":"Public","name":"MixtralKit","owner":"open-compass","isFork":false,"description":"A toolkit for inference and evaluation of 'mixtral-8x7b-32kseqlen' from Mistral AI","allTopics":["moe","mistral","llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":12,"starsCount":762,"forksCount":81,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,52,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-15T19:10:55.603Z"}},{"type":"Public","name":"LawBench","owner":"open-compass","isFork":false,"description":"Benchmarking Legal Knowledge of Large Language Models","allTopics":["law","benchmark","llm","chatgpt"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":235,"forksCount":34,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-13T06:42:45.191Z"}},{"type":"Public","name":"BotChat","owner":"open-compass","isFork":false,"description":"Evaluating LLMs' multi-round chatting capability via assessing conversations generated by two LLM instances.","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":1,"starsCount":132,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-02T12:38:19.203Z"}},{"type":"Public","name":"pytorch_sphinx_theme","owner":"open-compass","isFork":true,"description":"Sphinx Theme for OpenCompass - Modified from PyTorch","allTopics":[],"primaryLanguage":{"name":"CSS","color":"#563d7c"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":138,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-30T04:20:35.062Z"}}],"repositoryCount":25,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"open-compass repositories"}