U
    eLO                     @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddl	mZ ddlmZ ddlT zddlZW n$ ek
r   ed	 ed
 Y nX ed zRddlm  mZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! W n. ek
r    ed ed ed Y nX ddddZ"ej#$dej%&dZ'd&e(e(e(e(ej)j*dddZ+eej,ddd Z-G d!d" d"Z.d#d$ Z/e0d%kre/  dS )'zThe LangSAM model for segmenting objects from satellite images using text prompts.
The source code is adapted from the https://github.com/luca-medeiros/lang-segment-anything repository.
Credits to Luca Medeiros for the original implementation.
    N)Image)sam_model_registry)SamPredictor)hf_hub_download   )*zInstalling rasterio...rasterioignore)build_model)box_ops)predict)SLConfig)clean_state_dictzInstalling GroundingDINO...zgroundingdino-pyz5Please restart the kernel and run the notebook again.zDhttps://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pthzDhttps://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pthzDhttps://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth)vit_hZvit_lZvit_bZ
TORCH_HOMEz~/.cache/torch/hub/checkpointscpu)repo_idfilenameckpt_config_filenamedevicereturnc           	      C   sh   t | ||d}t|}t|}|| t | ||d}tj|dd}|jt|d dd |	  |S )a  
    Loads a model from HuggingFace Model Hub.

    Args:
        repo_id (str): Repository ID on HuggingFace Model Hub.
        filename (str): Name of the model file in the repository.
        ckpt_config_filename (str): Name of the config file for the model in the repository.
        device (str): Device to load the model onto. Default is 'cpu'.

    Returns:
        torch.nn.Module: The loaded model.
    )r   r   Zforce_filenamer   )Zmap_locationmodelFstrict)
r   r   fromfiler
   totorchloadload_state_dictr   eval)	r   r   r   r   Zcache_config_fileargsr   Z
cache_file
checkpoint r!   R/home/ankuromar296_gmail_com/.local/lib/python3.8/site-packages/samgeo/text_sam.pyload_model_hf4   s"    

  r#   )imager   c              
   C   sF   t t jdgddt  t dddgddd	gg}|| d
\}}|S )z
    Transforms an image using standard transformations for image-based models.

    Args:
        image (Image): The PIL Image to be transformed.

    Returns:
        torch.Tensor: The transformed image as a tensor.
    i   i5  )max_sizeg
ףp=
?gv/?gCl?gZd;O?gy&1?g?N)TZComposeZRandomResizeZToTensorZ	Normalize)r$   	transformZimage_transformed_r!   r!   r"   transform_imageU   s    
r)   c                
   @   s   e Zd ZdZd'ddZdd Zdd Zd	d
 Zdd Zdd Z	dde
ji ddfddZde
ji ddfddZd(ddZd)d d!Zd*d"d#Zd+d%d&ZdS ),LangSAMzg
    A Language-based Segment-Anything Model (LangSAM) class which combines GroundingDINO and SAM.
    r   c                 C   sZ   t t j rdnd| _|   | | d| _d| _d| _d| _	d| _
d| _d| _dS )zInitialize the LangSAM instance.

        Args:
            model_type (str, optional): The model type. It can be one of the following: vit_h, vit_l, vit_b.
                Defaults to 'vit_h'. See https://bit.ly/3VrpxUh for more details.
        cudar   N)r   r   r+   Zis_availablebuild_groundingdino	build_samsourcer$   masksboxesphraseslogits
prediction)self
model_typer!   r!   r"   __init__p   s    
zLangSAM.__init__c                 C   sH   t | }t|  }tj|}|j|dd |j| jd t|| _	dS )zBuild the SAM model.

        Args:
            model_type (str, optional): The model type. It can be one of the following: vit_h, vit_l, vit_b.
                Defaults to 'vit_h'. See https://bit.ly/3VrpxUh for more details.
        Tr   )r   N)

SAM_MODELSr   r   ZhubZload_state_dict_from_urlr   r   r   r   sam)r4   r5   Zcheckpoint_urlr8   Z
state_dictr!   r!   r"   r-      s    
zLangSAM.build_samc                 C   s"   d}d}d}t |||| j| _dS )zBuild the GroundingDINO model.zShilongLiu/GroundingDINOzgroundingdino_swinb_cogcoor.pthzGroundingDINO_SwinB.cfg.pyN)r#   r   groundingdino)r4   Zckpt_repo_idZckpt_filenamer   r!   r!   r"   r,      s       zLangSAM.build_groundingdinoc                 C   sV   t |}t| j||||| jd\}}}|j\}	}
t|t|	|
|	|
g }|||fS )a  
        Run the GroundingDINO model prediction.

        Args:
            image (Image): Input PIL Image.
            text_prompt (str): Text prompt for the model.
            box_threshold (float): Box threshold for the prediction.
            text_threshold (float): Text threshold for the prediction.

        Returns:
            tuple: Tuple containing boxes, logits, and phrases.
        )r   r$   captionbox_thresholdtext_thresholdr   )	r)   r   r9   r   sizer   Zbox_cxcywh_to_xyxyr   Tensor)r4   r$   text_promptr;   r<   Zimage_transr0   r2   r1   WHr!   r!   r"   predict_dino   s    
zLangSAM.predict_dinoc                 C   s\   t |}| j| | jj||jdd }| jjdd|| jj	dd\}}}|
 S )z
        Run the SAM model prediction.

        Args:
            image (Image): Input PIL Image.
            boxes (torch.Tensor): Tensor of bounding boxes.

        Returns:
            Masks tensor.
        N   F)Zpoint_coordsZpoint_labelsr0   Zmultimask_output)npZasarrayr8   	set_imager'   Zapply_boxes_torchshapeZpredict_torchr   r   r   )r4   r$   r0   Zimage_arrayZtransformed_boxesr/   r(   r!   r!   r"   predict_sam   s    
 zLangSAM.predict_samc                 C   sJ   t |tr@|drt|}tj|s8td| d|| _nd| _dS )zoSet the input image.

        Args:
            image (str): The path to the image file or a HTTP URL.
        httpInput path  does not exist.N)	
isinstancestr
startswithdownload_fileospathexists
ValueErrorr.   )r4   r$   r!   r!   r"   rE      s    

zLangSAM.set_imageN   Fc              	   K   s  t |tr|drt|}tj|s8td| d|| _t	
|D}| d}|j| _|j| _t|ddddddf }W 5 Q R X n|}t|}|| _| ||||\}}}tg }t|dkr| ||}|d}| dkrtd	 dS tj|d
 |d}tt||D ]H\}\}}t |tjrP|  ! "|}||dk|d  "|7 }q$|dk| }|dk	rt#||| jfd|i| || _$|| _%|| _&|| _'|| _(|	r||||fS |
rg }| j%D ](}|  ! }|)|d |d f q|S dS )ap  
        Run both GroundingDINO and SAM model prediction.

        Parameters:
            image (Image): Input PIL Image.
            text_prompt (str): Text prompt for the model.
            box_threshold (float): Box threshold for the prediction.
            text_threshold (float): Text threshold for the prediction.
            output (str, optional): Output path for the prediction. Defaults to None.
            mask_multiplier (int, optional): Mask multiplier for the prediction. Defaults to 255.
            dtype (np.dtype, optional): Data type for the prediction. Defaults to np.uint8.
            save_args (dict, optional): Save arguments for the prediction. Defaults to {}.
            return_results (bool, optional): Whether to return the results. Defaults to False.

        Returns:
            tuple: Tuple containing masks, boxes, phrases, and logits.
        rH   rI   rJ   r   rC   r   N   r   r   No objects found in the image..r   dtyperY   )*rK   rL   rM   rN   rO   rP   rQ   rR   r.   r   openread	transposer'   crsr   	fromarrayrD   arrayr$   rB   r   ZtensorlenrG   Zsqueezenelementprint
zeros_like	enumeratezipr>   r   numpyastypearray_to_imager/   r0   r1   r2   r3   append)r4   r$   r?   r;   r<   outputmask_multiplierrY   	save_argsZreturn_resultsZreturn_coordskwargssrcimage_np	image_pilr0   r2   r1   r/   mask_overlayiboxmaskZboxlistr!   r!   r"   r      sx     


   


 


zLangSAM.predictTc              	   K   s0  ddl }tj|st| t|trHt| tj|d}|	  t|tsZt
dt|D ]\}}tjtj|d }|
rtdt|d ttt| dt| d| d	 tj|| d
}| j||||f||||d| qb|	r,tj|d}t|| |
r,td| d dS )a4  
        Run both GroundingDINO and SAM model prediction for a batch of images.

        Parameters:
            images (list): List of input PIL Images.
            out_dir (str): Output directory for the prediction.
            text_prompt (str): Text prompt for the model.
            box_threshold (float): Box threshold for the prediction.
            text_threshold (float): Text threshold for the prediction.
            mask_multiplier (int, optional): Mask multiplier for the prediction. Defaults to 255.
            dtype (np.dtype, optional): Data type for the prediction. Defaults to np.uint8.
            save_args (dict, optional): Save arguments for the prediction. Defaults to {}.
            merge (bool, optional): Whether to merge the predictions into a single GeoTIFF file. Defaults to True.
        r   Nz*.tifz6images must be a list or a directory to GeoTIFF files.zProcessing image r   z of z: z...z	_mask.tif)rj   rk   rY   rl   z
merged.tifzSaved the merged prediction to .)globrO   rP   rQ   makedirsrK   rL   listjoinsortrR   rd   splitextbasenamerb   zfillr`   r   Zmerge_rasters)r4   Zimagesout_dirr?   r;   r<   rk   rY   rl   mergeverboserm   rv   rr   r$   r|   rj   r!   r!   r"   predict_batchM  sB    


4	
zLangSAM.predict_batch	EPSG:4326c                 K   sd   | j dkrtd dS | j  }t| jf||d|}|dkrPt|| j||S t|| j|| dS )a  Save the bounding boxes to a vector file.

        Args:
            output (str): The path to the output vector file.
            dst_crs (str, optional): The destination CRS. Defaults to "EPSG:4326".
            **kwargs: Additional arguments for boxes_to_vector().
        NPlease run predict() first.)r0   dst_crs)r0   rb   tolistZrowcol_to_xyr.   Zboxes_to_vectorr]   )r4   rj   r   rm   r0   Zcoordsr!   r!   r"   
save_boxes  s    	

zLangSAM.save_boxes   
   offviridis皙?rr   c              	   K   sZ  ddl }ddlm} ddlm} |d | j}|dkrDtd dS t|dkr\td dS |j	|d |
| j |r| jD ]X}|  }|j|d |d f|d |d  |d	 |d  ||d
d}| | q~d|krd|d< d|krd|d< |j
|||d |dk	r|| || |	dk	rV|
rF|j|	f| nt| j|	| j dS )a  Show the annotations (objects with random color) on the input image.

        Args:
            figsize (tuple, optional): The figure size. Defaults to (12, 10).
            axis (str, optional): Whether to show the axis. Defaults to "off".
            cmap (str, optional): The colormap for the annotations. Defaults to "viridis".
            alpha (float, optional): The alpha value for the annotations. Defaults to 0.4.
            add_boxes (bool, optional): Whether to show the bounding boxes. Defaults to True.
            box_color (str, optional): The color for the bounding boxes. Defaults to "r".
            box_linewidth (int, optional): The line width for the bounding boxes. Defaults to 1.
            title (str, optional): The title for the image. Defaults to None.
            output (str, optional): The path to the output image. Defaults to None.
            blend (bool, optional): Whether to show the input image. Defaults to True.
            kwargs (dict, optional): Additional arguments for matplotlib.pyplot.savefig().
        r   Nr	   r   rV   )figsizer   rC   rU   none)Z	linewidthZ	edgecolorZ	facecolorZdpid   Zbbox_inchesZtight)cmapalpha)warningsZmatplotlib.pyplotZpyplotZmatplotlib.patchespatchesfilterwarningsr3   rb   r`   ZfigureZimshowr$   r0   r   rf   Z	RectangleZgcaZ	add_patchtitleaxisZsavefigrh   r.   )r4   r   r   r   r   Z	add_boxesZ	box_colorZbox_linewidthr   rj   blendrm   r   Zpltr   annsrs   rectr!   r!   r"   	show_anns  sJ    





zLangSAM.show_annsc                 K   s   t ||fd|i| dS )ag  Save the result to a vector file.

        Args:
            image (str): The path to the image file.
            output (str): The path to the vector file.
            simplify_tolerance (float, optional): The maximum allowed geometry displacement.
                The higher this value, the smaller the number of vertices in the resulting geometry.
        simplify_toleranceN)raster_to_vector)r4   r$   rj   r   rm   r!   r!   r"   r     s    
zLangSAM.raster_to_vector	SATELLITEc                 K   s   t | f||d|S )a<  Show the interactive map.

        Args:
            basemap (str, optional): The basemap. It can be one of the following: SATELLITE, ROADMAP, TERRAIN, HYBRID.
            out_dir (str, optional): The path to the output directory. Defaults to None.

        Returns:
            leafmap.Map: The map object.
        )basemapr~   )Ztext_sam_gui)r4   r   r~   rm   r!   r!   r"   show_map  s    
zLangSAM.show_map)r   )Nr   )
r   r   r   r   Tr   r   NNT)N)r   N)__name__
__module____qualname____doc__r6   r-   r,   rB   rG   rE   rD   uint8r   r   r   r   r   r   r!   r!   r!   r"   r*   k   sD   
	
s
B
          
P
r*   c                  C   s  t jdd} | jdddd | jdddd | jd	d
tdd | jdd
tdd |  }t|j }| 	d}|j
}|j}W 5 Q R X t }t|d d d d d df }| }|||j|j|j\}	}
}}|
 dkrtd n\tj|d tjd}tt|
D ]:}|
|   }|	|   }||dk|d  7 }q|dkd tj}tjddd|jd |jd d|j ||d	}|!|d W 5 Q R X d S )Nr*   )descriptionz--imageTzpath to the image)requiredhelpz--promptztext promptz--box_thresholdg      ?zbox threshold)defaulttyper   z--text_thresholdztext thresholdrT   rU   r   rV   rW   rX   r   rS   zmask.tifwZGTiff)ZdriverheightwidthcountrY   r]   r'   )"argparseArgumentParseradd_argumentfloat
parse_argsr   rZ   r$   r[   r\   r'   r]   r*   r   r^   copyr   promptr;   r<   ra   rb   rD   rc   Zint64ranger`   r   rf   rg   r   rF   rY   write)parserr   rn   ro   r'   r]   r   rp   Zimage_np_copyr/   r0   r1   r2   rq   rr   rs   rt   dstr!   r!   r"   main  sv             
 
r   __main__)r   )1r   rO   r   r   rf   rD   r   ZPILr   Zsegment_anythingr   r   Zhuggingface_hubr   commonr   ImportErrorrb   Zinstall_packager   Z!groundingdino.datasets.transformsZdatasetsZ
transformsr&   Zgroundingdino.modelsr
   Zgroundingdino.utilr   Zgroundingdino.util.inferencer   Zgroundingdino.util.slconfigr   Zgroundingdino.util.utilsr   r7   environgetrP   
expanduserZ
CACHE_PATHrL   nnModuler#   r>   r)   r*   r   r   r!   r!   r!   r"   <module>   sf   
 
    !   $@
