> ## Documentation Index
> Fetch the complete documentation index at: https://docs.merchantops.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Get Crawl Discoveries

> Get crawl discoveries for a job.

**Authentication Required**: Requires `job:read` permission.

Returns discovered URLs with their classification results for HITL review.



## OpenAPI

````yaml /openapi/merchantops-public.json get /api/products/crawl-discoveries/{job_id}
openapi: 3.1.0
info:
  description: >-
    Public API for the MerchantOps product catalog, pricing, and publishing
    surface.
  title: MerchantOps API
  version: 1.0.0
servers:
  - description: Production
    url: https://api.merchantops.ai
security: []
paths:
  /api/products/crawl-discoveries/{job_id}:
    get:
      tags:
        - Products
      summary: Get Crawl Discoveries
      description: >-
        Get crawl discoveries for a job.


        **Authentication Required**: Requires `job:read` permission.


        Returns discovered URLs with their classification results for HITL
        review.
      operationId: get_crawl_discoveries
      parameters:
        - in: path
          name: job_id
          required: true
          schema:
            title: Job Id
            type: string
        - description: Filter by product page classification
          in: query
          name: is_product_page
          required: false
          schema:
            anyOf:
              - type: boolean
              - type: 'null'
            description: Filter by product page classification
            title: Is Product Page
        - description: Filter by selection status
          in: query
          name: selected
          required: false
          schema:
            anyOf:
              - type: boolean
              - type: 'null'
            description: Filter by selection status
            title: Selected
        - description: Filter by processed status
          in: query
          name: processed
          required: false
          schema:
            anyOf:
              - type: boolean
              - type: 'null'
            description: Filter by processed status
            title: Processed
        - description: Minimum confidence score
          in: query
          name: min_confidence
          required: false
          schema:
            anyOf:
              - maximum: 1
                minimum: 0
                type: number
              - type: 'null'
            description: Minimum confidence score
            title: Min Confidence
        - description: Max results to return
          in: query
          name: limit
          required: false
          schema:
            default: 100
            description: Max results to return
            maximum: 500
            minimum: 1
            title: Limit
            type: integer
        - description: Number of results to skip
          in: query
          name: offset
          required: false
          schema:
            default: 0
            description: Number of results to skip
            minimum: 0
            title: Offset
            type: integer
        - in: header
          name: authorization
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: Authorization
        - in: header
          name: X-Organization-ID
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: X-Organization-Id
        - in: cookie
          name: stytch_session
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: Stytch Session
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlDiscoveryListResponse'
          description: Successful Response
        '422':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
          description: Validation Error
components:
  schemas:
    CrawlDiscoveryListResponse:
      description: Response model for list of crawl discoveries.
      properties:
        items:
          items:
            $ref: '#/components/schemas/CrawlDiscoveryResponse'
          title: Items
          type: array
        limit:
          title: Limit
          type: integer
        offset:
          title: Offset
          type: integer
        processed_count:
          title: Processed Count
          type: integer
        product_pages:
          title: Product Pages
          type: integer
        selected_count:
          title: Selected Count
          type: integer
        total:
          title: Total
          type: integer
      required:
        - total
        - product_pages
        - selected_count
        - processed_count
        - items
        - limit
        - offset
      title: CrawlDiscoveryListResponse
      type: object
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          title: Detail
          type: array
      title: HTTPValidationError
      type: object
    CrawlDiscoveryResponse:
      description: Response model for a single crawl discovery.
      properties:
        classification_reason:
          anyOf:
            - type: string
            - type: 'null'
          description: LLM's reasoning
          title: Classification Reason
        confidence:
          description: Classification confidence (0.0-1.0)
          title: Confidence
          type: number
        description:
          anyOf:
            - type: string
            - type: 'null'
          description: Page description
          title: Description
        id:
          description: MongoDB ObjectId as string
          title: Id
          type: string
        import_error:
          anyOf:
            - type: string
            - type: 'null'
          description: Import error message
          title: Import Error
        is_product_page:
          description: Whether LLM classified this as a product page
          title: Is Product Page
          type: boolean
        job_id:
          description: Crawl job ID
          title: Job Id
          type: string
        processed:
          default: false
          description: Already imported as product
          title: Processed
          type: boolean
        product_key:
          anyOf:
            - type: string
            - type: 'null'
          description: Product key if imported
          title: Product Key
        selected:
          default: false
          description: User selected for import
          title: Selected
          type: boolean
        title:
          anyOf:
            - type: string
            - type: 'null'
          description: Page title
          title: Title
        url:
          description: Discovered URL
          title: Url
          type: string
      required:
        - id
        - job_id
        - url
        - is_product_page
        - confidence
      title: CrawlDiscoveryResponse
      type: object
    ValidationError:
      properties:
        ctx:
          title: Context
          type: object
        input:
          title: Input
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          title: Location
          type: array
        msg:
          title: Message
          type: string
        type:
          title: Error Type
          type: string
      required:
        - loc
        - msg
        - type
      title: ValidationError
      type: object

````