diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 0353ccbf..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,36 +0,0 @@ - -# Read the Docs configuration file for Sphinx projects -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.12" - # You can also specify other tool versions: - # nodejs: "20" - # rust: "1.70" - # golang: "1.20" - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/conf.py - # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs - # builder: "dirhtml" - # Fail on all warnings to avoid broken references - # fail_on_warning: true - -# Optionally build your docs in additional formats such as PDF and ePub -# formats: -# - pdf -# - epub - -# Optional but recommended, declare the Python requirements required -# to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -# python: -# install: -# - requirements: docs/requirements.txt diff --git a/README.md b/README.md index d16bdf3e..08fa225d 100644 --- a/README.md +++ b/README.md @@ -156,8 +156,7 @@ Remember to have [Ollama](https://ollama.com/) installed and download the models [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -The documentation for ScrapeGraphAI can be found [here](https://scrapegraph-ai.readthedocs.io/en/latest/). -Check out also the Docusaurus [here](https://docs-oss.scrapegraphai.com/). +The documentation for ScrapeGraphAI can be found [here](https://docs.scrapegraphai.com/introduction). ## 🤝 Contributing @@ -181,10 +180,10 @@ We offer SDKs in both Python and Node.js, making it easy to integrate into your | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -The Official API Documentation can be found [here](https://docs.scrapegraphai.com/). +The Official API Documentation can be found [here](https://docs.scrapegraphai.com/introduction). ## 📈 Telemetry -We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html). +We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://docs.scrapegraphai.com/introduction). ## ❤️ Contributors [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf1..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/chinese.md b/docs/chinese.md index 0fbbd7ca..0ef07e5c 100644 --- a/docs/chinese.md +++ b/docs/chinese.md @@ -159,8 +159,7 @@ print(json.dumps(result, indent=4)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -ScrapeGraphAI 的文档可以在[这里](https://scrapegraph-ai.readthedocs.io/en/latest/)找到。 -还可以查看 Docusaurus [这里](https://docs-oss.scrapegraphai.com/)。 +ScrapeGraphAI 的文档可以在[这里](https://docs.scrapegraphai.com/introduction)找到。 ## 🤝 贡献 @@ -184,7 +183,7 @@ ScrapeGraphAI 的文档可以在[这里](https://scrapegraph-ai.readthedocs.io/e | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -官方 API 文档可以在[这里](https://docs.scrapegraphai.com/)找到。 +官方 API 文档可以在[这里](https://docs.scrapegraphai.com/introduction)找到。 ## 🔥 基准测试 @@ -193,7 +192,7 @@ ScrapeGraphAI 的文档可以在[这里](https://scrapegraph-ai.readthedocs.io/e ![here](assets/histogram.png) ## 📈 遥测 -我们收集匿名使用指标以增强我们包的质量和用户体验。这些数据帮助我们确定改进的优先级并确保兼容性。如果您希望退出,请设置环境变量 SCRAPEGRAPHAI_TELEMETRY_ENABLED=false。有关更多信息,请参阅[这里](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html)的文档。 +我们收集匿名使用指标以增强我们包的质量和用户体验。这些数据帮助我们确定改进的优先级并确保兼容性。如果您希望退出,请设置环境变量 SCRAPEGRAPHAI_TELEMETRY_ENABLED=false。有关更多信息,请参阅[这里](https://docs.scrapegraphai.com/introduction)的文档。 ## ❤️ 贡献者 [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/docs/japanese.md b/docs/japanese.md index d975f918..32f6d214 100644 --- a/docs/japanese.md +++ b/docs/japanese.md @@ -159,8 +159,7 @@ print(json.dumps(result, indent=4)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -ScrapeGraphAIのドキュメントは[こちら](https://scrapegraph-ai.readthedocs.io/en/latest/)で見ることができます。 -Docusaurusの[バージョン](https://docs-oss.scrapegraphai.com/)もご覧ください。 +ScrapeGraphAIのドキュメントは[こちら](https://docs.scrapegraphai.com/introduction)で見ることができます。 ## 🤝 貢献 @@ -184,7 +183,7 @@ PythonとNode.jsの両方でSDKを提供しており、プロジェクトに簡 | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -公式APIドキュメントは[こちら](https://docs.scrapegraphai.com/)で見ることができます。 +公式APIドキュメントは[こちら](https://docs.scrapegraphai.com/introduction)で見ることができます。 ## 🔥 ベンチマーク @@ -193,7 +192,7 @@ Firecrawlベンチマーク [Firecrawl benchmark](https://github.com/firecrawl/s ![here](assets/histogram.png) ## 📈 テレメトリ -パッケージの品質とユーザーエクスペリエンスを向上させるために、匿名の使用メトリクスを収集しています。このデータは、改善の優先順位付けと互換性の確保に役立ちます。オプトアウトする場合は、環境変数SCRAPEGRAPHAI_TELEMETRY_ENABLED=falseを設定してください。詳細については、[こちら](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html)のドキュメントを参照してください。 +パッケージの品質とユーザーエクスペリエンスを向上させるために、匿名の使用メトリクスを収集しています。このデータは、改善の優先順位付けと互換性の確保に役立ちます。オプトアウトする場合は、環境変数SCRAPEGRAPHAI_TELEMETRY_ENABLED=falseを設定してください。詳細については、[こちら](https://docs.scrapegraphai.com/introduction)のドキュメントを参照してください。 ## ❤️ 貢献者 [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/docs/korean.md b/docs/korean.md index 7f0db4b4..9eea8944 100644 --- a/docs/korean.md +++ b/docs/korean.md @@ -159,8 +159,7 @@ OpenAI, Groq, Azure, Gemini와 같은 API를 통해 다양한 LLM을 사용할 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -ScrapeGraphAI 관련 문서는 [여기](https://scrapegraph-ai.readthedocs.io/en/latest/)에서 확인하실 수 있습니다. -Docusaurus도 [여기](https://docs-oss.scrapegraphai.com/)에서 확인해 보세요. +ScrapeGraphAI 관련 문서는 [여기](https://docs.scrapegraphai.com/introduction)에서 확인하실 수 있습니다. ## 🤝 기여 @@ -184,7 +183,7 @@ Python과 Node.js SDK를 제공하여 프로젝트에 쉽게 통합할 수 있 | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -공식 API 문서는 [여기](https://docs.scrapegraphai.com/)에서 확인할 수 있습니다. +공식 API 문서는 [여기](https://docs.scrapegraphai.com/introduction)에서 확인할 수 있습니다. ## 🔥 벤치마크 @@ -193,7 +192,7 @@ Firecrawl 벤치마크 [Firecrawl benchmark](https://github.com/firecrawl/scrape ![here](assets/histogram.png) ## 📈 텔레메트리 -저희는 패키지의 품질과 사용자 경험을 향상시키기 위해 익명의 사용 지표를 수집합니다. 이 데이터는 개선 사항의 우선순위를 정하고 호환성을 보장하는 데 도움이 됩니다. 옵트아웃하려면 환경 변수 SCRAPEGRAPHAI_TELEMETRY_ENABLED=false를 설정하세요. 자세한 내용은 [여기](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html)에서 설명서를 참조하세요. +저희는 패키지의 품질과 사용자 경험을 향상시키기 위해 익명의 사용 지표를 수집합니다. 이 데이터는 개선 사항의 우선순위를 정하고 호환성을 보장하는 데 도움이 됩니다. 옵트아웃하려면 환경 변수 SCRAPEGRAPHAI_TELEMETRY_ENABLED=false를 설정하세요. 자세한 내용은 [여기](https://docs.scrapegraphai.com/introduction)에서 설명서를 참조하세요. ## ❤️ 기여자들 [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index dc1312ab..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/portuguese.md b/docs/portuguese.md index c68c83ad..a4a5f9b8 100644 --- a/docs/portuguese.md +++ b/docs/portuguese.md @@ -159,8 +159,7 @@ Lembre-se de ter o [Ollama](https://ollama.com/) instalado e baixar os modelos u [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -A documentação do ScrapeGraphAI pode ser encontrada [aqui](https://scrapegraph-ai.readthedocs.io/en/latest/). -Confira também o Docusaurus [aqui](https://docs-oss.scrapegraphai.com/). +A documentação do ScrapeGraphAI pode ser encontrada [aqui](https://docs.scrapegraphai.com/introduction). ## 🤝 Contribuindo @@ -184,7 +183,7 @@ Oferecemos SDKs em Python e Node.js, facilitando a integração em seus projetos | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -A Documentação Oficial da API pode ser encontrada [aqui](https://docs.scrapegraphai.com/). +A Documentação Oficial da API pode ser encontrada [aqui](https://docs.scrapegraphai.com/introduction). ## 🔥 Benchmark @@ -193,7 +192,7 @@ De acordo com o benchmark do Firecrawl [Firecrawl benchmark](https://github.com/ ![here](assets/histogram.png) ## 📈 Telemetria -Coletamos métricas de uso anônimas para melhorar a qualidade e a experiência do usuário do nosso pacote. Os dados nos ajudam a priorizar melhorias e garantir compatibilidade. Se você deseja optar por não participar, defina a variável de ambiente SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. Para mais informações, consulte a documentação [aqui](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html). +Coletamos métricas de uso anônimas para melhorar a qualidade e a experiência do usuário do nosso pacote. Os dados nos ajudam a priorizar melhorias e garantir compatibilidade. Se você deseja optar por não participar, defina a variável de ambiente SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. Para mais informações, consulte a documentação [aqui](https://docs.scrapegraphai.com/introduction). ## ❤️ Contribuidores [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/docs/requirements-dev.txt b/docs/requirements-dev.txt deleted file mode 100644 index a8dc6239..00000000 --- a/docs/requirements-dev.txt +++ /dev/null @@ -1,7 +0,0 @@ -sphinx>=7.1.2 -sphinx-rtd-theme>=1.3.0 -myst-parser>=2.0.0 -sphinx-copybutton>=0.5.2 -sphinx-design>=0.5.0 -sphinx-autodoc-typehints>=1.25.2 -sphinx-autoapi>=3.0.0 \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 71252ff7..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -sphinx>=7.1.2 - -sphinx-rtd-theme>=1.3.0 -myst-parser>=2.0.0 -sphinx-copybutton>=0.5.2 -sphinx-design>=0.5.0 -sphinx-autodoc-typehints>=1.25.2 -sphinx-autoapi>=3.0.0 -furo>=2024.1.29 \ No newline at end of file diff --git a/docs/russian.md b/docs/russian.md index e8351831..9a0cb11d 100644 --- a/docs/russian.md +++ b/docs/russian.md @@ -159,8 +159,7 @@ print(json.dumps(result, indent=4)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -Документация для ScrapeGraphAI доступна [здесь](https://scrapegraph-ai.readthedocs.io/en/latest/). -Посмотрите также Docusaurus [здесь](https://docs-oss.scrapegraphai.com/). +Документация для ScrapeGraphAI доступна [здесь](https://docs.scrapegraphai.com/introduction). ## 🤝 Участие @@ -184,7 +183,7 @@ print(json.dumps(result, indent=4)) | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -Официальная документация API доступна [здесь](https://docs.scrapegraphai.com/). +Официальная документация API доступна [здесь](https://docs.scrapegraphai.com/introduction). ## 🔥 Бенчмарк @@ -193,7 +192,7 @@ print(json.dumps(result, indent=4)) ![here](assets/histogram.png) ## 📈 Телеметрия -Мы собираем анонимные метрики использования для повышения качества нашего пакета и пользовательского опыта. Данные помогают нам определять приоритеты улучшений и обеспечивать совместимость. Если вы хотите отказаться, установите переменную окружения SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. Для получения дополнительной информации обратитесь к документации [здесь](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html). +Мы собираем анонимные метрики использования для повышения качества нашего пакета и пользовательского опыта. Данные помогают нам определять приоритеты улучшений и обеспечивать совместимость. Если вы хотите отказаться, установите переменную окружения SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. Для получения дополнительной информации обратитесь к документации [здесь](https://docs.scrapegraphai.com/introduction). ## ❤️ Разработчики программного обеспечения diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 790cfa15..00000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,41 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -# -- Path setup -------------------------------------------------------------- - -import os -import sys - -# import all the modules -sys.path.insert(0, os.path.abspath("../../")) - -project = "ScrapeGraphAI" -copyright = "2024, ScrapeGraphAI" -author = "Marco Vinciguerra, , Lorenzo Padoan" - -html_last_updated_fmt = "%b %d, %Y" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] - -templates_path = ["_templates"] -exclude_patterns = [] - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "furo" -html_theme_options = { - "source_repository": "https://github.com/VinciGit00/Scrapegraph-ai/", - "source_branch": "main", - "source_directory": "docs/source/", - "navigation_with_keys": True, - "sidebar_hide_name": False, -} diff --git a/docs/source/getting_started/examples.rst b/docs/source/getting_started/examples.rst deleted file mode 100644 index 5696e468..00000000 --- a/docs/source/getting_started/examples.rst +++ /dev/null @@ -1,87 +0,0 @@ -Examples -======== - -Let's suppose you want to scrape a website to get a list of projects with their descriptions. -You can use the `SmartScraperGraph` class to do that. -The following examples show how to use the `SmartScraperGraph` class with OpenAI models and local models. - -OpenAI models -^^^^^^^^^^^^^ - -.. code-block:: python - - import os - from dotenv import load_dotenv - from scrapegraphai.graphs import SmartScraperGraph - from scrapegraphai.utils import prettify_exec_info - - load_dotenv() - - openai_key = os.getenv("OPENAI_APIKEY") - - graph_config = { - "llm": { - "api_key": openai_key, - "model": "openai/gpt-4o", - }, - } - - # ************************************************ - # Create the SmartScraperGraph instance and run it - # ************************************************ - - smart_scraper_graph = SmartScraperGraph( - prompt="List me all the projects with their description.", - # also accepts a string with the already downloaded HTML code - source="https://perinim.github.io/projects/", - config=graph_config - ) - - result = smart_scraper_graph.run() - print(result) - - -Local models -^^^^^^^^^^^^^ - -Remember to have installed in your pc ollama `ollama ` -Remember to pull the right model for LLM and for the embeddings, like: - -.. code-block:: bash - - ollama pull llama3 - ollama pull nomic-embed-text - ollama pull mistral - -After that, you can run the following code, using only your machine resources brum brum brum: - -.. code-block:: python - - from scrapegraphai.graphs import SmartScraperGraph - from scrapegraphai.utils import prettify_exec_info - - graph_config = { - "llm": { - "model": "ollama/mistral", - "temperature": 1, - "format": "json", # Ollama needs the format to be specified explicitly - "model_tokens": 2000, # depending on the model set context length - "base_url": "http://localhost:11434", # set ollama URL of the local host (YOU CAN CHANGE IT, if you have a different endpoint - } - } - - # ************************************************ - # Create the SmartScraperGraph instance and run it - # ************************************************ - - smart_scraper_graph = SmartScraperGraph( - prompt="List me all the projects with their description.", - # also accepts a string with the already downloaded HTML code - source="https://perinim.github.io/projects", - config=graph_config - ) - - result = smart_scraper_graph.run() - print(result) - -To find out how you can customize the `graph_config` dictionary, by using different LLM and adding new parameters, check the `Scrapers` section! diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst deleted file mode 100644 index a9fd7626..00000000 --- a/docs/source/getting_started/installation.rst +++ /dev/null @@ -1,48 +0,0 @@ -Installation ------------- - -In the following sections I will guide you through the installation process of the required components -for this project. - -Prerequisites -^^^^^^^^^^^^^ - -- `Python >=3.9 `_ -- `pip `_ -- `Ollama `_ (optional for local models) - - -Install the library -^^^^^^^^^^^^^^^^^^^^ - -The library is available on PyPI, so it can be installed using the following command: - -.. code-block:: bash - - pip install scrapegraphai - -.. important:: - - It is higly recommended to install the library in a virtual environment (conda, venv, etc.) - -If your clone the repository, it is recommended to use a package manager like `uv `_. -To install the library using uv, you can run the following command: - -.. code-block:: bash - - uv pin 3.10 - uv sync - uv build - -.. caution:: - - **Rye** must be installed first by following the instructions on the `official website `_. - -Additionally on Windows when using WSL -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you are using Windows Subsystem for Linux (WSL) and you are facing issues with the installation of the library, you might need to install the following packages: - -.. code-block:: bash - - sudo apt-get -y install libnss3 libnspr4 libgbm1 libasound2 diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 9d0b5a15..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Scrapegraph-ai documentation master file, created by - sphinx-quickstart on Wed Jan 31 15:38:23 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -.. toctree:: - :maxdepth: 2 - :caption: Introduction - - introduction/overview - introduction/contributing - -.. toctree:: - :maxdepth: 2 - :caption: Getting Started - - getting_started/installation - getting_started/examples - -.. toctree:: - :maxdepth: 2 - :caption: Scrapers - - scrapers/graphs - -.. toctree:: - :maxdepth: 2 - :caption: Modules - - modules/modules - -.. toctree:: - :hidden: - :caption: EXTERNAL RESOURCES - - GitHub - Discord - Linkedin - Twitter - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/introduction/contributing.rst b/docs/source/introduction/contributing.rst deleted file mode 100644 index 75f5adab..00000000 --- a/docs/source/introduction/contributing.rst +++ /dev/null @@ -1,13 +0,0 @@ -Contributing -============ - -Hey, you want to contribute? Awesome! -Just fork the repo, make your changes, and send a pull request. -If you're not sure if it's a good idea, open an issue and we'll discuss it. - -Go and check out the `contributing guidelines `__ for more information. - -License -======= -This project is licensed under the MIT license. -See the `LICENSE `__ file for more details. diff --git a/docs/source/introduction/overview.rst b/docs/source/introduction/overview.rst deleted file mode 100644 index 3ff1c1fb..00000000 --- a/docs/source/introduction/overview.rst +++ /dev/null @@ -1,210 +0,0 @@ -.. image:: ../../assets/scrapegraphai_logo.png - :align: center - :width: 50% - :alt: ScrapegraphAI - -Overview -======== - -ScrapeGraphAI is an **open-source** Python library designed to revolutionize **scraping** tools. -In today's data-intensive digital landscape, this library stands out by integrating **Large Language Models** (LLMs) -and modular **graph-based** pipelines to automate the scraping of data from various sources (e.g., websites, local files etc.). - -Simply specify the information you need to extract, and ScrapeGraphAI handles the rest, providing a more **flexible** and **low-maintenance** solution compared to traditional scraping tools. - -For comprehensive documentation and updates, visit our `website `_. - - -Why ScrapegraphAI? -================== - -Traditional web scraping tools often rely on fixed patterns or manual configuration to extract data from web pages. -ScrapegraphAI, leveraging the power of LLMs, adapts to changes in website structures, reducing the need for constant developer intervention. -This flexibility ensures that scrapers remain functional even when website layouts change. - -We support many LLMs including **GPT, Gemini, Groq, Azure, Hugging Face** etc. -as well as local models which can run on your machine using **Ollama**. - -AI Models and Token Limits -========================== - -ScrapGraphAI supports a wide range of AI models from various providers. Each model has a specific token limit, which is important to consider when designing your scraping pipelines. Here's an overview of the supported models and their token limits: - -OpenAI Models -------------- -- GPT-3.5 Turbo (16,385 tokens) -- GPT-3.5 (4,096 tokens) -- GPT-3.5 Turbo Instruct (4,096 tokens) -- GPT-4 Turbo Preview (128,000 tokens) -- GPT-4 Vision Preview (128,000 tokens) -- GPT-4 (8,192 tokens) -- GPT-4 32k (32,768 tokens) -- GPT-4o (128,000 tokens) -- O1 Preview (128,000 tokens) -- O1 Mini (128,000 tokens) - -Azure OpenAI Models -------------------- -- GPT-3.5 Turbo (16,385 tokens) -- GPT-3.5 (4,096 tokens) -- GPT-4 Turbo Preview (128,000 tokens) -- GPT-4 (8,192 tokens) -- GPT-4 32k (32,768 tokens) -- GPT-4o (128,000 tokens) -- O1 Preview (128,000 tokens) -- O1 Mini (128,000 tokens) - -Google AI Models ----------------- -- Gemini Pro (128,000 tokens) -- Gemini 1.5 Flash (128,000 tokens) -- Gemini 1.5 Pro (128,000 tokens) -- Gemini 1.0 Pro (128,000 tokens) - -Anthropic Models ----------------- -- Claude Instant (100,000 tokens) -- Claude 2 (9,000 tokens) -- Claude 2.1 (200,000 tokens) -- Claude 3 (200,000 tokens) -- Claude 3.5 (200,000 tokens) -- Claude 3 Opus (200,000 tokens) -- Claude 3 Sonnet (200,000 tokens) -- Claude 3 Haiku (200,000 tokens) - -Mistral AI Models ------------------ -- Mistral Large Latest (128,000 tokens) -- Open Mistral Nemo (128,000 tokens) -- Codestral Latest (32,000 tokens) -- Open Mistral 7B (32,000 tokens) -- Open Mixtral 8x7B (32,000 tokens) -- Open Mixtral 8x22B (64,000 tokens) -- Open Codestral Mamba (256,000 tokens) - -Ollama Models -------------- -- Command-R (12,800 tokens) -- CodeLlama (16,000 tokens) -- DBRX (32,768 tokens) -- DeepSeek Coder 33B (16,000 tokens) -- Llama2 Series (4,096 tokens) -- Llama3 Series (8,192-128,000 tokens) -- Mistral Models (32,000-128,000 tokens) -- Mixtral 8x22B Instruct (65,536 tokens) -- Phi3 Series (12,800-128,000 tokens) -- Qwen Series (32,000 tokens) - -Hugging Face Models ------------------- -- Grok-1 (8,192 tokens) -- Meta Llama 3 Series (8,192 tokens) -- Google Gemma Series (8,192 tokens) -- Microsoft Phi Series (2,048-131,072 tokens) -- GPT-2 Series (1,024 tokens) -- DeepSeek V2 Series (131,072 tokens) - -Bedrock Models -------------- -- Claude 3 Series (200,000 tokens) -- Llama2 & Llama3 Series (4,096-8,192 tokens) -- Mistral Series (32,768 tokens) -- Titan Embed Text (8,000 tokens) -- Cohere Embed (512 tokens) - -Fireworks Models ---------------- -- Llama V2 7B (4,096 tokens) -- Mixtral 8x7B Instruct (4,096 tokens) -- Llama 3.1 Series (131,072 tokens) -- Mixtral MoE Series (65,536 tokens) - -For a complete and up-to-date list of supported models and their token limits, please refer to the API documentation. - -Understanding token limits is crucial for optimizing your scraping tasks. Larger token limits allow for processing more text in a single API call, which can be beneficial for scraping lengthy web pages or documents. - - -Library Diagram -=============== - -With ScrapegraphAI you can use many already implemented scraping pipelines or create your own. - -The diagram below illustrates the high-level architecture of ScrapeGraphAI: - -.. image:: ../../assets/project_overview_diagram.png - :align: center - :width: 70% - :alt: ScrapegraphAI Overview - -FAQ -=== - -1. **What is ScrapeGraphAI?** - - ScrapeGraphAI is an open-source python library that uses large language models (LLMs) and graph logic to automate the creation of scraping pipelines for websites and various document types. - -2. **How does ScrapeGraphAI differ from traditional scraping tools?** - - Traditional scraping tools rely on fixed patterns and manual configurations, whereas ScrapeGraphAI adapts to website structure changes using LLMs, reducing the need for constant developer intervention. - -3. **Which LLMs are supported by ScrapeGraphAI?** - - ScrapeGraphAI supports several LLMs, including GPT, Gemini, Groq, Azure, Hugging Face, and local models that can run on your machine using Ollama. - -4. **Can ScrapeGraphAI handle different document formats?** - - Yes, ScrapeGraphAI can scrape information from various document formats such as XML, HTML, JSON, and more. - -5. **I get an empty or incorrect output when scraping a website. What should I do?** - - There are several reasons behind this issue, but for most cases, you can try the following: - - - Set the `headless` parameter to `False` in the graph_config. Some javascript-heavy websites might require it. - - - Check your internet connection. Low speed or unstable connection can cause the HTML to not load properly. - - - Try using a proxy server to mask your IP address. Check out the :ref:`Proxy` section for more information on how to configure proxy settings. - - - Use a different LLM model. Some models might perform better on certain websites than others. - - - Set the `verbose` parameter to `True` in the graph_config to see more detailed logs. - - - Visualize the pipeline graphically using :ref:`Burr`. - - If the issue persists, please report it on the GitHub repository. - -6. **How does ScrapeGraphAI handle the context window limit of LLMs?** - - By splitting big websites/documents into chunks with overlaps and applying compression techniques to reduce the number of tokens. If multiple chunks are present, we will have multiple answers to the user prompt, and therefore, we merge them together in the last step of the scraping pipeline. - -7. **How can I contribute to ScrapeGraphAI?** - - You can contribute to ScrapeGraphAI by submitting bug reports, feature requests, or pull requests on the GitHub repository. Join our `Discord `_ community and follow us on social media! - -Sponsors -======== - -.. image:: ../../assets/browserbase_logo.png - :width: 10% - :alt: Browserbase - :target: https://www.browserbase.com/ - -.. image:: ../../assets/serp_api_logo.png - :width: 10% - :alt: Serp API - :target: https://serpapi.com?utm_source=scrapegraphai - -.. image:: ../../assets/transparent_stat.png - :width: 15% - :alt: Stat Proxies - :target: https://dashboard.statproxies.com/?refferal=scrapegraph - -.. image:: ../../assets/scrapedo.png - :width: 11% - :alt: Scrapedo - :target: https://scrape.do - -.. image:: ../../assets/scrapegraph_logo.png - :width: 11% - :alt: ScrapegraphAI - :target: https://scrapegraphai.com diff --git a/docs/source/modules/modules.rst b/docs/source/modules/modules.rst deleted file mode 100644 index d3237dcd..00000000 --- a/docs/source/modules/modules.rst +++ /dev/null @@ -1,9 +0,0 @@ -scrapegraphai -============= - -.. toctree:: - :maxdepth: 4 - - scrapegraphai - - scrapegraphai.helpers.models_tokens diff --git a/docs/source/modules/scrapegraphai.builders.rst b/docs/source/modules/scrapegraphai.builders.rst deleted file mode 100644 index 668ea5bc..00000000 --- a/docs/source/modules/scrapegraphai.builders.rst +++ /dev/null @@ -1,21 +0,0 @@ -scrapegraphai.builders package -============================== - -Submodules ----------- - -scrapegraphai.builders.graph\_builder module --------------------------------------------- - -.. automodule:: scrapegraphai.builders.graph_builder - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.builders - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.docloaders.rst b/docs/source/modules/scrapegraphai.docloaders.rst deleted file mode 100644 index be66f042..00000000 --- a/docs/source/modules/scrapegraphai.docloaders.rst +++ /dev/null @@ -1,21 +0,0 @@ -scrapegraphai.docloaders package -================================ - -Submodules ----------- - -scrapegraphai.docloaders.chromium module ----------------------------------------- - -.. automodule:: scrapegraphai.docloaders.chromium - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.docloaders - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.graphs.rst b/docs/source/modules/scrapegraphai.graphs.rst deleted file mode 100644 index 7eca6683..00000000 --- a/docs/source/modules/scrapegraphai.graphs.rst +++ /dev/null @@ -1,133 +0,0 @@ -scrapegraphai.graphs package -============================ - -Submodules ----------- - -scrapegraphai.graphs.abstract\_graph module -------------------------------------------- - -.. automodule:: scrapegraphai.graphs.abstract_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.base\_graph module ---------------------------------------- - -.. automodule:: scrapegraphai.graphs.base_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.csv\_scraper\_graph module ------------------------------------------------ - -.. automodule:: scrapegraphai.graphs.csv_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.deep\_scraper\_graph module ------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.deep_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.json\_scraper\_graph module ------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.json_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.omni\_scraper\_graph module ------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.omni_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.omni\_search\_graph module ------------------------------------------------ - -.. automodule:: scrapegraphai.graphs.omni_search_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.pdf\_scraper\_graph module ------------------------------------------------ - -.. automodule:: scrapegraphai.graphs.pdf_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.script\_creator\_graph module --------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.script_creator_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.search\_graph module ------------------------------------------ - -.. automodule:: scrapegraphai.graphs.search_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.smart\_scraper\_graph module -------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.smart_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.smart\_scraper\_graph\_burr module -------------------------------------------------------- - -.. automodule:: scrapegraphai.graphs.smart_scraper_graph_burr - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.smart\_scraper\_graph\_hamilton module ------------------------------------------------------------ - -.. automodule:: scrapegraphai.graphs.smart_scraper_graph_hamilton - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.speech\_graph module ------------------------------------------ - -.. automodule:: scrapegraphai.graphs.speech_graph - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.graphs.xml\_scraper\_graph module ------------------------------------------------ - -.. automodule:: scrapegraphai.graphs.xml_scraper_graph - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.graphs - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.helpers.models_tokens.rst b/docs/source/modules/scrapegraphai.helpers.models_tokens.rst deleted file mode 100644 index 6df4f086..00000000 --- a/docs/source/modules/scrapegraphai.helpers.models_tokens.rst +++ /dev/null @@ -1,28 +0,0 @@ -scrapegraphai.helpers.models_tokens module -========================================== - -.. automodule:: scrapegraphai.helpers.models_tokens - :members: - :undoc-members: - :show-inheritance: - -This module contains a comprehensive dictionary of AI models and their corresponding token limits. The `models_tokens` dictionary is organized by provider (e.g., OpenAI, Azure OpenAI, Google AI, etc.) and includes various models with their maximum token counts. - -Example usage: - -.. code-block:: python - - from scrapegraphai.helpers.models_tokens import models_tokens - - # Get the token limit for GPT-4 - gpt4_limit = models_tokens['openai']['gpt-4'] - print(f"GPT-4 token limit: {gpt4_limit}") - - # Check the token limit for a specific model - model_name = "gpt-4o-mini" - if model_name in models_tokens['openai']: - print(f"{model_name} token limit: {models_tokens['openai'][model_name]}") - else: - print(f"{model_name} not found in the models list") - -This information is crucial for users to understand the capabilities and limitations of different AI models when designing their scraping pipelines. diff --git a/docs/source/modules/scrapegraphai.helpers.rst b/docs/source/modules/scrapegraphai.helpers.rst deleted file mode 100644 index 5bcdf457..00000000 --- a/docs/source/modules/scrapegraphai.helpers.rst +++ /dev/null @@ -1,45 +0,0 @@ -scrapegraphai.helpers package -============================= - -Submodules ----------- - -scrapegraphai.helpers.models\_tokens module -------------------------------------------- - -.. automodule:: scrapegraphai.helpers.models_tokens - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.helpers.nodes\_metadata module --------------------------------------------- - -.. automodule:: scrapegraphai.helpers.nodes_metadata - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.helpers.robots module ------------------------------------ - -.. automodule:: scrapegraphai.helpers.robots - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.helpers.schemas module ------------------------------------- - -.. automodule:: scrapegraphai.helpers.schemas - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.helpers - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.integrations.rst b/docs/source/modules/scrapegraphai.integrations.rst deleted file mode 100644 index a90c8b7a..00000000 --- a/docs/source/modules/scrapegraphai.integrations.rst +++ /dev/null @@ -1,21 +0,0 @@ -scrapegraphai.integrations package -================================== - -Submodules ----------- - -scrapegraphai.integrations.burr\_bridge module ----------------------------------------------- - -.. automodule:: scrapegraphai.integrations.burr_bridge - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.integrations - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.models.rst b/docs/source/modules/scrapegraphai.models.rst deleted file mode 100644 index f16ad476..00000000 --- a/docs/source/modules/scrapegraphai.models.rst +++ /dev/null @@ -1,101 +0,0 @@ -scrapegraphai.models package -============================ - -Submodules ----------- - -scrapegraphai.models.anthropic module -------------------------------------- - -.. automodule:: scrapegraphai.models.anthropic - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.azure\_openai module ------------------------------------------ - -.. automodule:: scrapegraphai.models.azure_openai - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.bedrock module ------------------------------------ - -.. automodule:: scrapegraphai.models.bedrock - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.deepseek module ------------------------------------- - -.. automodule:: scrapegraphai.models.deepseek - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.gemini module ----------------------------------- - -.. automodule:: scrapegraphai.models.gemini - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.groq module --------------------------------- - -.. automodule:: scrapegraphai.models.groq - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.hugging\_face module ------------------------------------------ - -.. automodule:: scrapegraphai.models.hugging_face - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.ollama module ----------------------------------- - -.. automodule:: scrapegraphai.models.ollama - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.openai module ----------------------------------- - -.. automodule:: scrapegraphai.models.openai - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.openai\_itt module ---------------------------------------- - -.. automodule:: scrapegraphai.models.openai_itt - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.models.openai\_tts module ---------------------------------------- - -.. automodule:: scrapegraphai.models.openai_tts - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.models - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.nodes.rst b/docs/source/modules/scrapegraphai.nodes.rst deleted file mode 100644 index c89eecfc..00000000 --- a/docs/source/modules/scrapegraphai.nodes.rst +++ /dev/null @@ -1,165 +0,0 @@ -scrapegraphai.nodes package -=========================== - -Submodules ----------- - -scrapegraphai.nodes.base\_node module -------------------------------------- - -.. automodule:: scrapegraphai.nodes.base_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.conditional\_node module --------------------------------------------- - -.. automodule:: scrapegraphai.nodes.conditional_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.fetch\_node module --------------------------------------- - -.. automodule:: scrapegraphai.nodes.fetch_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.generate\_answer\_csv\_node module ------------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.generate_answer_csv_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.generate\_answer\_node module -------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.generate_answer_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.generate\_answer\_omni\_node module -------------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.generate_answer_omni_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.generate\_answer\_pdf\_node module ------------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.generate_answer_pdf_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.generate\_scraper\_node module --------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.generate_scraper_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.get\_probable\_tags\_node module ----------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.get_probable_tags_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.graph\_iterator\_node module ------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.graph_iterator_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.image\_to\_text\_node module ------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.image_to_text_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.merge\_answers\_node module ------------------------------------------------ - -.. automodule:: scrapegraphai.nodes.merge_answers_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.parse\_node module --------------------------------------- - -.. automodule:: scrapegraphai.nodes.parse_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.rag\_node module ------------------------------------- - -.. automodule:: scrapegraphai.nodes.rag_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.robots\_node module ---------------------------------------- - -.. automodule:: scrapegraphai.nodes.robots_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.search\_internet\_node module -------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.search_internet_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.search\_link\_node module ---------------------------------------------- - -.. automodule:: scrapegraphai.nodes.search_link_node - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.search\_node\_with\_context module ------------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.search_node_with_context - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.nodes.text\_to\_speech\_node module -------------------------------------------------- - -.. automodule:: scrapegraphai.nodes.text_to_speech_node - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.nodes - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.rst b/docs/source/modules/scrapegraphai.rst deleted file mode 100644 index df0fb1a9..00000000 --- a/docs/source/modules/scrapegraphai.rst +++ /dev/null @@ -1,25 +0,0 @@ -scrapegraphai package -===================== - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - scrapegraphai.builders - scrapegraphai.docloaders - scrapegraphai.graphs - scrapegraphai.helpers - scrapegraphai.integrations - scrapegraphai.models - scrapegraphai.nodes - scrapegraphai.utils - -Module contents ---------------- - -.. automodule:: scrapegraphai - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules/scrapegraphai.utils.rst b/docs/source/modules/scrapegraphai.utils.rst deleted file mode 100644 index d9100f1e..00000000 --- a/docs/source/modules/scrapegraphai.utils.rst +++ /dev/null @@ -1,93 +0,0 @@ -scrapegraphai.utils package -=========================== - -Submodules ----------- - -scrapegraphai.utils.cleanup\_html module ----------------------------------------- - -.. automodule:: scrapegraphai.utils.cleanup_html - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.convert\_to\_csv module -------------------------------------------- - -.. automodule:: scrapegraphai.utils.convert_to_csv - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.convert\_to\_json module --------------------------------------------- - -.. automodule:: scrapegraphai.utils.convert_to_json - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.parse\_state\_keys module ---------------------------------------------- - -.. automodule:: scrapegraphai.utils.parse_state_keys - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.prettify\_exec\_info module ------------------------------------------------ - -.. automodule:: scrapegraphai.utils.prettify_exec_info - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.proxy\_rotation module ------------------------------------------- - -.. automodule:: scrapegraphai.utils.proxy_rotation - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.research\_web module ----------------------------------------- - -.. automodule:: scrapegraphai.utils.research_web - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.save\_audio\_from\_bytes module ---------------------------------------------------- - -.. automodule:: scrapegraphai.utils.save_audio_from_bytes - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.sys\_dynamic\_import module ------------------------------------------------ - -.. automodule:: scrapegraphai.utils.sys_dynamic_import - :members: - :undoc-members: - :show-inheritance: - -scrapegraphai.utils.token\_calculator module --------------------------------------------- - -.. automodule:: scrapegraphai.utils.token_calculator - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: scrapegraphai.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/scrapers/graph_config.rst b/docs/source/scrapers/graph_config.rst deleted file mode 100644 index e16ccae7..00000000 --- a/docs/source/scrapers/graph_config.rst +++ /dev/null @@ -1,88 +0,0 @@ -.. _Configuration: - -Additional Parameters -===================== - -It is possible to customize the behavior of the graphs by setting some configuration options. -Some interesting ones are: - -- `verbose`: If set to `True`, some debug information will be printed to the console. -- `headless`: If set to `False`, the web browser will be opened on the URL requested and close right after the HTML is fetched. -- `max_results`: The maximum number of results to be fetched from the search engine. Useful in `SearchGraph`. -- `output_path`: The path where the output files will be saved. Useful in `SpeechGraph`. -- `loader_kwargs`: A dictionary with additional parameters to be passed to the `Loader` class, such as `proxy`. -- `burr_kwargs`: A dictionary with additional parameters to enable `Burr` graphical user interface. -- `max_images`: The maximum number of images to be analyzed. Useful in `OmniScraperGraph` and `OmniSearchGraph`. -- `cache_path`: The path where the cache files will be saved. If already exists, the cache will be loaded from this path. -- `additional_info`: Add additional text to default prompts defined in the graphs. -.. _Burr: - -Burr Integration -^^^^^^^^^^^^^^^^ - -`Burr` is an open source python library that allows the creation and management of state machine applications. Discover more about it `here `_. -It is possible to enable a local hosted webapp to visualize the scraping pipelines and the data flow. -First, we need to install the `burr` library as follows: - -.. code-block:: bash - - pip install scrapegraphai[burr] - -and then run the graphical user interface as follows: - -.. code-block:: bash - - burr - -To log your graph execution in the platform, you need to set the `burr_kwargs` parameter in the graph configuration as follows: - -.. code-block:: python - - graph_config = { - "llm":{...}, - "burr_kwargs": { - "project_name": "test-scraper", - "app_instance_id":"some_id", - } - } - -.. _Proxy: - -Proxy Rotation -^^^^^^^^^^^^^^ - -It is possible to rotate the proxy by setting the `proxy` option in the graph configuration. -We provide a free proxy service which is based on `free-proxy `_ library and can be used as follows: - -.. code-block:: python - - graph_config = { - "llm":{...}, - "loader_kwargs": { - "proxy" : { - "server": "broker", - "criteria": { - "anonymous": True, - "secure": True, - "countryset": {"IT"}, - "timeout": 10.0, - "max_shape": 3 - }, - }, - }, - } - -Do you have a proxy server? You can use it as follows: - -.. code-block:: python - - graph_config = { - "llm":{...}, - "loader_kwargs": { - "proxy" : { - "server": "http://your_proxy_server:port", - "username": "your_username", - "password": "your_password", - }, - }, - } diff --git a/docs/source/scrapers/graphs.rst b/docs/source/scrapers/graphs.rst deleted file mode 100644 index ee5f072f..00000000 --- a/docs/source/scrapers/graphs.rst +++ /dev/null @@ -1,13 +0,0 @@ -Graphs -====== - -Graphs are scraping pipelines aimed at solving specific tasks. They are composed by nodes which can be configured individually to address different aspects of the task (fetching data, extracting information, etc.). - -.. toctree:: - :maxdepth: 4 - - types - llm - graph_config - benchmarks - telemetry diff --git a/docs/source/scrapers/llm.rst b/docs/source/scrapers/llm.rst deleted file mode 100644 index 080daeeb..00000000 --- a/docs/source/scrapers/llm.rst +++ /dev/null @@ -1,227 +0,0 @@ -.. _llm: - -LLM -=== - -We support many known LLM models and providers used to analyze the web pages and extract the information requested by the user. Models can be split in **Chat Models** and **Embedding Models** (the latter are mainly used for Retrieval Augmented Generation RAG). -These models are specified inside the graph configuration dictionary and can be used interchangeably, for example by defining a different model for llm and embeddings. - -- **Local Models**: These models are hosted on the local machine and can be used without any API key. -- **API-based Models**: These models are hosted on the cloud and require an API key to access them (eg. OpenAI, Groq, etc). - -.. note:: - - If the emebedding model is not specified, the library will use the default one for that LLM, if available. - -Local Models ------------- - -Currently, local models are supported through Ollama integration. Ollama is a provider of LLM models which can be downloaded from here `Ollama `_. -Let's say we want to use **llama3** as chat model and **nomic-embed-text** as embedding model. We first need to pull them from ollama using: - -.. code-block:: bash - - ollama pull llama3 - ollama pull nomic-embed-text - -Then we can use them in the graph configuration as follows: - -.. code-block:: python - - graph_config = { - "llm": { - "model": "ollama/llama3", - "temperature": 0.0, - "format": "json", - }, - "embeddings": { - "model": "nomic-embed-text", - }, - } - -You can also specify the **base_url** parameter to specify the models endpoint. By default, it is set to http://localhost:11434. This is useful if you are running Ollama on a Docker container or on a different machine. - -If you want to host Ollama in a Docker container, you can use the following command: - -.. code-block:: bash - - docker-compose up -d - docker exec -it ollama ollama pull llama3 - -API-based Models ----------------- - -OpenAI -^^^^^^ - -You can get the API key from `here `_. - -.. code-block:: python - - graph_config = { - "llm": { - "api_key": "OPENAI_API_KEY", - "model": "gpt-3.5-turbo", - }, - } - -If you want to use text to speech models, you can specify the `tts_model` parameter: - -.. code-block:: python - - graph_config = { - "llm": { - "api_key": "OPENAI_API_KEY", - "model": "gpt-3.5-turbo", - "temperature": 0.7, - }, - "tts_model": { - "api_key": "OPENAI_API_KEY", - "model": "tts-1", - "voice": "alloy" - }, - } - -Gemini -^^^^^^ - -You can get the API key from `here `_. - -**Note**: some countries are not supported and therefore it won't be possible to request an API key. A possible workaround is to use a VPN or run the library on Colab. - -.. code-block:: python - - graph_config = { - "llm": { - "api_key": "GEMINI_API_KEY", - "model": "gemini-pro" - }, - } - -Groq -^^^^ - -You can get the API key from `here `_. Groq doesn't support embedding models, so in the following example we are using Ollama one. - -.. code-block:: python - - graph_config = { - "llm": { - "model": "groq/gemma-7b-it", - "api_key": "GROQ_API_KEY", - "temperature": 0 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - }, - } - -Azure -^^^^^ - -We can also pass a model instance for the chat model and the embedding model. For Azure, a possible configuration would be: - -.. code-block:: python - - llm_model_instance = AzureChatOpenAI( - openai_api_version="AZURE_OPENAI_API_VERSION", - azure_deployment="AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" - ) - - embedder_model_instance = AzureOpenAIEmbeddings( - azure_deployment="AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", - openai_api_version="AZURE_OPENAI_API_VERSION", - ) - # Supposing model_tokens are 100K - model_tokens_count = 100000 - graph_config = { - "llm": { - "model_instance": llm_model_instance, - "model_tokens": model_tokens_count, - }, - "embeddings": { - "model_instance": embedder_model_instance - } - } - -Hugging Face Hub -^^^^^^^^^^^^^^^^ - -We can also pass a model instance for the chat model and the embedding model. For Hugging Face, a possible configuration would be: - -.. code-block:: python - - llm_model_instance = HuggingFaceEndpoint( - repo_id="mistralai/Mistral-7B-Instruct-v0.2", - max_length=128, - temperature=0.5, - token="HUGGINGFACEHUB_API_TOKEN" - ) - - embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( - api_key="HUGGINGFACEHUB_API_TOKEN", - model_name="sentence-transformers/all-MiniLM-l6-v2" - ) - - graph_config = { - "llm": { - "model_instance": llm_model_instance - }, - "embeddings": { - "model_instance": embedder_model_instance - } - } - -Anthropic -^^^^^^^^^ - -We can also pass a model instance for the chat model and the embedding model. For Anthropic, a possible configuration would be: - -.. code-block:: python - - embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( - api_key="HUGGINGFACEHUB_API_TOKEN", - model_name="sentence-transformers/all-MiniLM-l6-v2" - ) - - graph_config = { - "llm": { - "api_key": "ANTHROPIC_API_KEY", - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, - "embeddings": { - "model_instance": embedder_model_instance - } - } - -Other LLM models -^^^^^^^^^^^^^^^^ - -We can also pass a model instance for the chat model and the embedding model through the **model_instance** parameter. -This feature enables you to utilize a Langchain model instance. -You will discover the model you require within the provided list: - -- `chat model list `_ -- `embedding model list `_. - -For instance, consider **chat model** Moonshot. We can integrate it in the following manner: - -.. code-block:: python - - from langchain_community.chat_models.moonshot import MoonshotChat - - # The configuration parameters are contingent upon the specific model you select - llm_instance_config = { - "model": "moonshot-v1-8k", - "base_url": "https://api.moonshot.cn/v1", - "moonshot_api_key": "MOONSHOT_API_KEY", - } - - llm_model_instance = MoonshotChat(**llm_instance_config) - graph_config = { - "llm": { - "model_instance": llm_model_instance, - "model_tokens": 5000 - }, - } diff --git a/docs/source/scrapers/telemetry.rst b/docs/source/scrapers/telemetry.rst deleted file mode 100644 index a80eb3b6..00000000 --- a/docs/source/scrapers/telemetry.rst +++ /dev/null @@ -1,78 +0,0 @@ -=============== -Usage Analytics -=============== - -ScrapeGraphAI collects **anonymous** usage data by default to improve the library and guide development efforts. - -**Events Captured** - -We capture events in the following scenarios: - -1. When a ``Graph`` finishes running. -2. When an exception is raised in one of the nodes. - -**Data Collected** - -The data captured is limited to: - -- Operating System and Python version -- A persistent UUID to identify the session, stored in ``~/.scrapegraphai.conf`` - -Additionally, the following properties are collected: - -.. code-block:: python - - properties = { - "graph_name": graph_name, - "llm_model": llm_model_name, - "embedder_model": embedder_model_name, - "source_type": source_type, - "source": source, - "execution_time": execution_time, - "prompt": prompt, - "schema": schema, - "error_node": error_node_name, - "exception": exception, - "response": response, - "total_tokens": total_tokens, - } - -For more details, refer to the `telemetry.py `_ module. - -**Opting Out** - -If you prefer not to participate in telemetry, you can opt out using any of the following methods: - -1. **Programmatically Disable Telemetry**: - - Add the following code at the beginning of your script: - - .. code-block:: python - - from scrapegraphai import telemetry - telemetry.disable_telemetry() - -2. **Configuration File**: - - Set the ``telemetry_enabled`` key to ``false`` in ``~/.scrapegraphai.conf`` under the ``[DEFAULT]`` section: - - .. code-block:: ini - - [DEFAULT] - telemetry_enabled = False - -3. **Environment Variable**: - - - **For a Shell Session**: - - .. code-block:: bash - - export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false - - - **For a Single Command**: - - .. code-block:: bash - - SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python my_script.py - -By following any of these methods, you can easily opt out of telemetry and ensure your usage data is not collected. diff --git a/docs/source/scrapers/types.rst b/docs/source/scrapers/types.rst deleted file mode 100644 index 42613066..00000000 --- a/docs/source/scrapers/types.rst +++ /dev/null @@ -1,225 +0,0 @@ -Types -===== - - -There are several types of graphs available in the library, each with its own purpose and functionality. The most common ones are: - -- **SmartScraperGraph**: one-page scraper that requires a user-defined prompt and a URL (or local file) to extract information using LLM. -- **SearchGraph**: multi-page scraper that only requires a user-defined prompt to extract information from a search engine using LLM. It is built on top of SmartScraperGraph. -- **SpeechGraph**: text-to-speech pipeline that generates an answer as well as a requested audio file. It is built on top of SmartScraperGraph and requires a user-defined prompt and a URL (or local file). -- **ScriptCreatorGraph**: script generator that creates a Python script to scrape a website using the specified library (e.g. BeautifulSoup). It requires a user-defined prompt and a URL (or local file). - -There are also two additional graphs that can handle multiple sources: - -- **SmartScraperMultiGraph**: similar to `SmartScraperGraph`, but with the ability to handle multiple sources. -- **ScriptCreatorMultiGraph**: similar to `ScriptCreatorGraph`, but with the ability to handle multiple sources. - -With the introduction of `GPT-4o`, two new powerful graphs have been created: - -- **OmniScraperGraph**: similar to `SmartScraperGraph`, but with the ability to scrape images and describe them. -- **OmniSearchGraph**: similar to `SearchGraph`, but with the ability to scrape images and describe them. - - -.. note:: - - They all use a graph configuration to set up LLM models and other parameters. To find out more about the configurations, check the :ref:`LLM` and :ref:`Configuration` sections. - - -.. note:: - - We can pass an optional `schema` parameter to the graph constructor to specify the output schema. If not provided or set to `None`, the schema will be generated by the LLM itself. - -OmniScraperGraph -^^^^^^^^^^^^^^^^ - -.. image:: ../../assets/omniscrapergraph.png - :align: center - :width: 90% - :alt: OmniScraperGraph -| - -First we define the graph configuration, which includes the LLM model and other parameters. Then we create an instance of the OmniScraperGraph class, passing the prompt, source, and configuration as arguments. Finally, we run the graph and print the result. -It will fetch the data from the source and extract the information based on the prompt in JSON format. - -.. code-block:: python - - from scrapegraphai.graphs import OmniScraperGraph - - graph_config = { - "llm": {...}, - } - - omni_scraper_graph = OmniScraperGraph( - prompt="List me all the projects with their titles and image links and descriptions.", - source="https://perinim.github.io/projects", - config=graph_config, - schema=schema - ) - - result = omni_scraper_graph.run() - print(result) - -OmniSearchGraph -^^^^^^^^^^^^^^^ - -.. image:: ../../assets/omnisearchgraph.png - :align: center - :width: 80% - :alt: OmniSearchGraph -| - -Similar to OmniScraperGraph, we define the graph configuration, create multiple of the OmniSearchGraph class, and run the graph. -It will create a search query, fetch the first n results from the search engine, run n OmniScraperGraph instances, and return the results in JSON format. - -.. code-block:: python - - from scrapegraphai.graphs import OmniSearchGraph - - graph_config = { - "llm": {...}, - } - - # Create the OmniSearchGraph instance - omni_search_graph = OmniSearchGraph( - prompt="List me all Chioggia's famous dishes and describe their pictures.", - config=graph_config, - schema=schema - ) - - # Run the graph - result = omni_search_graph.run() - print(result) - -SmartScraperGraph & SmartScraperMultiGraph -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. image:: ../../assets/smartscrapergraph.png - :align: center - :width: 90% - :alt: SmartScraperGraph -| - -First we define the graph configuration, which includes the LLM model and other parameters. Then we create an instance of the SmartScraperGraph class, passing the prompt, source, and configuration as arguments. Finally, we run the graph and print the result. -It will fetch the data from the source and extract the information based on the prompt in JSON format. - -.. code-block:: python - - from scrapegraphai.graphs import SmartScraperGraph - - graph_config = { - "llm": {...}, - } - - smart_scraper_graph = SmartScraperGraph( - prompt="List me all the projects with their descriptions", - source="https://perinim.github.io/projects", - config=graph_config, - schema=schema - ) - - result = smart_scraper_graph.run() - print(result) - -**SmartScraperMultiGraph** is similar to SmartScraperGraph, but it can handle multiple sources. We define the graph configuration, create an instance of the SmartScraperMultiGraph class, and run the graph. - -SearchGraph -^^^^^^^^^^^ - -.. image:: ../../assets/searchgraph.png - :align: center - :width: 80% - :alt: SearchGraph -| - -Similar to SmartScraperGraph, we define the graph configuration, create an instance of the SearchGraph class, and run the graph. -It will create a search query, fetch the first n results from the search engine, run n SmartScraperGraph instances, and return the results in JSON format. - - -.. code-block:: python - - from scrapegraphai.graphs import SearchGraph - - graph_config = { - "llm": {...}, - "embeddings": {...}, - } - - # Create the SearchGraph instance - search_graph = SearchGraph( - prompt="List me all the traditional recipes from Chioggia", - config=graph_config, - schema=schema - ) - - # Run the graph - result = search_graph.run() - print(result) - - -SpeechGraph -^^^^^^^^^^^ - -.. image:: ../../assets/speechgraph.png - :align: center - :width: 90% - :alt: SpeechGraph -| - -Similar to SmartScraperGraph, we define the graph configuration, create an instance of the SpeechGraph class, and run the graph. -It will fetch the data from the source, extract the information based on the prompt, and generate an audio file with the answer, as well as the answer itself, in JSON format. - -.. code-block:: python - - from scrapegraphai.graphs import SpeechGraph - - graph_config = { - "llm": {...}, - "tts_model": {...}, - } - - # ************************************************ - # Create the SpeechGraph instance and run it - # ************************************************ - - speech_graph = SpeechGraph( - prompt="Make a detailed audio summary of the projects.", - source="https://perinim.github.io/projects/", - config=graph_config, - schema=schema - ) - - result = speech_graph.run() - print(result) - - -ScriptCreatorGraph & ScriptCreatorMultiGraph -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. image:: ../../assets/scriptcreatorgraph.png - :align: center - :width: 90% - :alt: ScriptCreatorGraph - -First we define the graph configuration, which includes the LLM model and other parameters. -Then we create an instance of the ScriptCreatorGraph class, passing the prompt, source, and configuration as arguments. Finally, we run the graph and print the result. - -.. code-block:: python - - from scrapegraphai.graphs import ScriptCreatorGraph - - graph_config = { - "llm": {...}, - "library": "beautifulsoup4" - } - - script_creator_graph = ScriptCreatorGraph( - prompt="Create a Python script to scrape the projects.", - source="https://perinim.github.io/projects/", - config=graph_config, - schema=schema - ) - - result = script_creator_graph.run() - print(result) - -**ScriptCreatorMultiGraph** is similar to ScriptCreatorGraph, but it can handle multiple sources. We define the graph configuration, create an instance of the ScriptCreatorMultiGraph class, and run the graph. diff --git a/docs/turkish.md b/docs/turkish.md index dcd20649..f8ece689 100644 --- a/docs/turkish.md +++ b/docs/turkish.md @@ -160,8 +160,7 @@ Yerel modelleri kullanmak istiyorsanız, [Ollama](https://ollama.com/) kurulu ol [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing) -ScrapeGraphAI dokümantasyonuna [buradan](https://scrapegraph-ai.readthedocs.io/en/latest/) ulaşabilirsiniz. -Ayrıca Docusaurus'a [buradan](https://docs-oss.scrapegraphai.com/) göz atın. +ScrapeGraphAI dokümantasyonuna [buradan](https://docs.scrapegraphai.com/introduction) ulaşabilirsiniz. ## 🤝 Katkıda Bulunun @@ -185,7 +184,7 @@ Python ve Node.js için SDK'lar sunuyoruz, böylece projelerinize kolayca entegr | Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) | | Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) | -Resmi API Dokümantasyonu [burada](https://docs.scrapegraphai.com/) bulunabilir. +Resmi API Dokümantasyonu [burada](https://docs.scrapegraphai.com/introduction) bulunabilir. ## 🔥 Kıyaslama @@ -194,7 +193,7 @@ Firecrawl kıyaslamasına göre [Firecrawl benchmark](https://github.com/firecra ![here](assets/histogram.png) ## 📈 Telemetri -Paketimizin kalitesini ve kullanıcı deneyimini geliştirmek amacıyla anonim kullanım metrikleri topluyoruz. Bu veriler, iyileştirmelere öncelik vermemize ve uyumluluğu sağlamamıza yardımcı olur. İsterseniz, SCRAPEGRAPHAI_TELEMETRY_ENABLED=false ortam değişkenini ayarlayarak devre dışı bırakabilirsiniz. Daha fazla bilgi için lütfen [buraya](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html) bakın. +Paketimizin kalitesini ve kullanıcı deneyimini geliştirmek amacıyla anonim kullanım metrikleri topluyoruz. Bu veriler, iyileştirmelere öncelik vermemize ve uyumluluğu sağlamamıza yardımcı olur. İsterseniz, SCRAPEGRAPHAI_TELEMETRY_ENABLED=false ortam değişkenini ayarlayarak devre dışı bırakabilirsiniz. Daha fazla bilgi için lütfen [buraya](https://docs.scrapegraphai.com/introduction) bakın. ## ❤️ Katkıda Bulunanlar diff --git a/examples/readme.md b/examples/readme.md index 69adc1ff..dc0af893 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -2,7 +2,7 @@ This directory contains various example implementations of Scrapegraph-ai for different use cases. Each example demonstrates how to leverage the power of Scrapegraph-ai for specific scenarios. -> **Note:** While these examples showcase implementations using OpenAI and Ollama, Scrapegraph-ai supports many other LLM providers! Check out our [documentation](https://docs-oss.scrapegraphai.com/examples) for the full list of supported providers. +> **Note:** While these examples showcase implementations using OpenAI and Ollama, Scrapegraph-ai supports many other LLM providers! Check out our [documentation](https://docs.scrapegraphai.com/introduction) for the full list of supported providers. ## 📚 Available Examples @@ -51,13 +51,13 @@ Each example may have its own specific requirements. Please refer to the individ ## 📚 Additional Resources -- 📖 [Full Documentation](https://docs-oss.scrapegraphai.com/examples) +- 📖 [Full Documentation](https://docs.scrapegraphai.com/introduction) - 💡 [Examples Repository](https://github.com/ScrapeGraphAI/ScrapegraphLib-Examples) - 🤝 [Community Support](https://github.com/ScrapeGraphAI/scrapegraph-ai/discussions) ## 🤔 Need Help? -- Check out our [documentation](https://docs-oss.scrapegraphai.com) +- Check out our [documentation](https://docs.scrapegraphai.com/introduction) - Join our [Discord community](https://discord.gg/scrapegraphai) - Open an [issue](https://github.com/ScrapeGraphAI/scrapegraph-ai/issues) diff --git a/pyproject.toml b/pyproject.toml index a929d3bd..ff8d595d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ readme = "README.md" homepage = "https://scrapegraphai.com/" repository = "https://github.com/ScrapeGraphAI/Scrapegraph-ai" -documentation = "https://scrapegraph-ai.readthedocs.io/en/latest/" +documentation = "https://docs.scrapegraphai.com/introduction" keywords = [ "scrapegraph", "scrapegraphai", @@ -69,7 +69,6 @@ requires-python = ">=3.12,<4.0" [project.optional-dependencies] burr = ["burr[start]==0.22.1"] -docs = ["sphinx==6.0", "furo==2024.5.6"] nvidia = ["langchain-nvidia-ai-endpoints>=0.1.0"] ocr = [ "surya-ocr>=0.5.0", diff --git a/readthedocs.yml b/readthedocs.yml deleted file mode 100644 index 10e03b40..00000000 --- a/readthedocs.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Read the Docs configuration file for Sphinx projects -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.9" - jobs: - pre_build: - - sphinx-apidoc -o docs/source/modules scrapegraphai -f - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/source/conf.py - -# Specify the requirements file -python: - install: - - requirements: requirements.txt - - requirements: requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 9e8072f2..00000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,6 +0,0 @@ -sphinx>=7.1.2 -myst-parser>=2.0.0 -sphinx-copybutton>=0.5.2 -sphinx-design>=0.5.0 -sphinx-autodoc-typehints>=1.25.2 -sphinx-autoapi>=3.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9e8072f2..00000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -sphinx>=7.1.2 -myst-parser>=2.0.0 -sphinx-copybutton>=0.5.2 -sphinx-design>=0.5.0 -sphinx-autodoc-typehints>=1.25.2 -sphinx-autoapi>=3.0.0 \ No newline at end of file diff --git a/tests/README_TESTING.md b/tests/README_TESTING.md index 0296004f..c558d836 100644 --- a/tests/README_TESTING.md +++ b/tests/README_TESTING.md @@ -427,4 +427,4 @@ When adding tests: - [pytest Documentation](https://docs.pytest.org/) - [pytest-cov Documentation](https://pytest-cov.readthedocs.io/) -- [ScrapeGraphAI Documentation](https://scrapegraph-ai.readthedocs.io/) +- [ScrapeGraphAI Documentation](https://docs.scrapegraphai.com/introduction)