U
    Khn                     @   s  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlZd dl	Z	d dl
mZ d dlZd dlZd dlT d dlZd dlmZ d d	lmZmZ d dlZd
ddiddiddiddiddiddiddiddiddiddiddigiZdHddZdIddZdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Z d.d/ Z!d0d1 Z"d2d3 Z#d4d5 Z$d6d7 Z%d8d9 Z&d:d; Z'd<d= Z(d>d? Z)d@dA Z*dBdC Z+dDdE Z,dFdG Z-dS )J    )OpenAIN)tqdm)urlparse)HTTPBasicAuth)Image)*)fuzz)datetime	timedelta$orZcategoryUrizdmoz/Businesszdmoz/Healthz	dmoz/Homezdmoz/Recreationzdmoz/Societyzdmoz/Computerszdmoz/Shoppingzdmoz/Sportsz	dmoz/Artsz
dmoz/Games<   c                 C   s   t | ||kS )a"  
    Check if two titles are similar based on a similarity threshold.

    Args:
        title1 (str): First title.
        title2 (str): Second title.
        threshold (int): Similarity threshold (default 85%).

    Returns:
        bool: True if titles are similar, False otherwise.
    )r   Ztoken_sort_ratio)Ztitle1Ztitle2	threshold r   >/home/ankuromar296_gmail_com/publish_blogs_ai/gsc/post_news.pyis_similar_title6   s    r   d   c                    s   ddddiddiddiddiddidd	idd
iddigid|i||dgiddgdddd}|dkr|d d }| dddigi ||d d< t| t|}g }	g }
|j| |dD ]>}|dd  rt fdd|
D s|	 | |
   q|	S )a  
    Fetch news articles based on location and date range, filtering out similar titles.

    Args:
        location_uri (str): The URI of the location (e.g., country).
        date_start (str): The start date for fetching articles (format: YYYY-MM-DD).
        date_end (str): The end date for fetching articles (format: YYYY-MM-DD).
        max_items (int): Maximum number of articles to fetch. Default is 100.

    Returns:
        list: A list of unique or sufficiently distinct articles fetched based on the query.
    z$andr   Z
conceptUriz!http://en.wikipedia.org/wiki/Soilz:http://en.wikipedia.org/wiki/Geographic_information_systemz(http://en.wikipedia.org/wiki/Agriculturez$http://en.wikipedia.org/wiki/Farmingz%http://en.wikipedia.org/wiki/Forestryz'http://en.wikipedia.org/wiki/Fertilizerz)http://en.wikipedia.org/wiki/Agribusinessz)http://en.wikipedia.org/wiki/TraceabilityZlocationUri)Z	dateStartZdateEndZblognewsZskipDuplicates)ZdataTypeZisDuplicaterel)$queryz$filterZarticlesSortByenr   langeng)maxItemstitleNc                 3   s   | ]}t  |V  qd S N)r   ).0Z
seen_titler   r   r   	<genexpr>   s    z$get_news_articles.<locals>.<genexpr>)appendprintZQueryArticlesIterZinitWithComplexQueryZ	execQuerygetany)erZ
date_startZdate_end	geographyr   	max_itemsqueryZ	query_andqZarticles_listZseen_titlesarticler   r   r   get_news_articlesE   s|           )1 

r(   c                 C   sH   t d| ddd} |  } tdd| } tdd| d} | S )	NNFKDasciiignorezutf-8z[^a-z0-9\s-] z\s+-)unicodedata	normalizeencodedecodelowerresubstripr   r   r   r   create_slug   s      r6   c                 C   s,   zt | }t| W n   | }Y nX |S r   )astliteral_evalr   )stringarrayr   r   r   string_to_array   s    

r;   c                 C   sF   d}d}t |d}|jjjddddd| dg||d	}|jd
 jjS )N3sk-VHC3Gjk2iuFCPtANMrliT3BlbkFJ7wxsFMqRp4KreMhwLiWzsk-proj-O44Tus5LHWDwreXOqQOMjJqqKIVMrIYHNBoJSitbCH4OLdT5bDUp3Ey9n7qtt1zTsbwrUtHX6gT3BlbkFJLbzL1SHbiJDfiSin8Kyf--R9BfRQp4WTCa7kxGxQlZB-ALIqFlror4MCBBAcT5mc6k4a0T3PkAapi_keyzgpt-4o-mini-2024-07-18systemz:You are a expert in SEO and a representative of Farmonaut.rolecontentuser)modelmessages
max_tokenstemperaturer   )r   chatcompletionscreatechoicesmessagerC   )promptrH   rG   r?   client
completionr   r   r   call_openai   s    
rQ   c                 C   s6   t jdd}|jjd||dd| dgd}|jd jS )	Nzl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>   zclaude-3-5-sonnet-20240620zYou are an SEO expert, a gis/ remote sensing expert, an agriculture and horticulture scientist, and a representative of Farmonaut (farmonaut.com).rD   rA   )rE   rG   rH   r@   rF   r   )	anthropic	AnthropicrF   rK   rC   text)rN   rH   rG   rO   rM   r   r   r   
call_genai   s    
rU   c                 C   s8   d}d}t |d}|jjd| dddd}|jd	 j}|S )
Nr<   r=   r>   zdall-e-3	1024x1024standard   )rE   rN   sizequalitynr   )r   imagesgeneratedataurl)rN   r?   rO   response	image_urlr   r   r   ai_image_url   s    
rb   c                 C   s2   t | }|j}tj|d }|r.|dd  S dS )NrX   r,   )r   pathossplitext)r_   
parsed_urlrc   file_extensionr   r   r   get_file_extension   s    rh   c                 C   s   t | d}t |d}t|jd }|j|j }t|| }|||ft j}|j|j df}t d|j	d}|
||| t ||}	t | }
|
jdkr|	|
j}	|	|  td|   d S )NRGBAg?r   )r   r   r   r   zWatermark added to )r   openconvertintheightwidthresizeLANCZOSnewrY   pastealpha_compositemodesaver   )main_image_pathwatermark_path
main_image	watermark
new_heightaspect_ratio	new_widthpositiontransparentoutputoriginal_imager   r   r   add_watermark	  s    


r   c              	   C   s   t j| r|  ds"tdt| T}t j| d }|j	dks\|j	dkrfd|j
krf|d}| d}||d	 W 5 Q R X t |  td
|  d| d d S )Nz.pngz*The provided file is not a valid PNG file.r   )ri   LAPtransparencyRGBz.jpgJPEGz
Converted z to z and removed the original PNG.)rd   rc   isfiler2   endswith
ValueErrorr   rj   re   rt   infork   ru   remover   )	file_pathimg	file_namejpg_pathr   r   r   convert_png_to_jpg0  s    



r   c              	   C   s   d}d}d}| d}t ||}t| \}}|||d}	| dtj|  }
t| d&}d|
||fi}tj||||	d	}W 5 Q R X |j	d
kr|
 d |
 d fS td|j	  td|j  dS d S )Nhttps://www.farmonaut.comankuromar296Tjat A2hz 9XMv pXJi YbV0 GR8oz/wp-json/wp/v2/media)alt_textcaptiondescription_rbfile)filesauthjson   id
source_urlz%Failed to upload media. Status code: z
Response: )NN)r   	mimetypes
guess_typerd   rc   basenamerj   requestspoststatus_coder   r   rT   )r   r   wp_urlwp_usernamewp_passwordendpointr   	mime_typer   
media_dataupload_namer   r   r`   r   r   r   upload_media_to_wordpressJ  s     


r   c              
   C   s   d}d}d}| d}t ||}z tj| d|d}|  W n8 tjjk
rx } ztdt| W 5 d }~X Y nX tj|| |d}|j	d	kr|
 S td
|j d S )Nr   r   r   z/wp-json/wp/v2/postsz/wp-json)r   z Failed to access WordPress API: )r   r   )   r   zFailed to publish/update post: )r   r   r    raise_for_status
exceptionsRequestException	Exceptionstrr   r   r   rT   )	post_datar   r   r   	posts_urlr   r`   er   r   r    publish_or_update_wordpress_postc  s    

$
r   c              
   C   s\  g }d}d}| D ]}|d }|d t | d t t| }ztj|dd}W n   tt  d }Y nX |d krvqtjtj	
|dd |jd	krt|d
&}|jddD ]}	|	r||	 qW 5 Q R X td|  t|d t| qtd|j  qt|}
|
D ]H}tj	||}t||\}}|r|r|d|||d nqq|S )Nr   Zinsta_files_newsrX   /.T)stream)exist_okr   wbi    )
chunk_sizezFile downloaded successfully: zwatermark.jpgz&Failed to download file. Status code: image)typerT   r   r_   )r   rh   r   r    r   	traceback
format_excrd   makedirsrc   dirnamer   rj   iter_contentwriter   r   listdirjoinr   r   )media_url_arrr   
media_info	media_numfolder_namer_   r   r`   r   chunkmedia_filesmedia_id	media_urlr   r   r   process_media2  sD    




r   c                 C   s   | j dd}dg d}|d ddd| j d| j dd	 t|d
dD ]V\}}d||dd | j d| j dd	|d |d   d	}|d | qP|S )Nr   BreadcrumbList)@typeitemListElementr   ListItemrX   Homez://)r   r}   nameitem   )startr-    )
rc   r5   splitr   schemenetloc	enumeratereplacer   r   )rf   path_segmentsbreadcrumbsr}   segmentbreadcrumb_itemr   r   r   create_breadcrumbs_fromURL  s(    

 (r   c                 C   sP   t |}dddddddgdf}tt|}t  d	 }d
d| ||||dS )NOrganization	Farmonautzhttps://farmonaut.comz"https://www.facebook.com/farmonautzhttps://twitter.com/farmonautz*https://www.linkedin.com/company/farmonautz#https://www.instagram.com/farmonaut)r   r   r_   sameAsZzhttps://schema.orgZNewsArticle)z@contextr   headliner   author
breadcrumbZdatePublished)listr   r   r	   utcnow	isoformat)r   r   blog_url
image_urlsorganizationr   Zcurrent_timestampr   r   r   create_news_schema  s*    r   c                 C   s4   t t|d }d| d| d}t|ddd}|S )N   zRewrite this text in roughly z tokens : 
    Text: z<
    Note: Do not add any additional information whatsoever.rX   @  rH   rG   )rl   lenrQ   )r   rC   total_tokensrN   summaryr   r   r   get_news_summary  s    r   c                 C   s,   d|  d| d}t |ddd}t|}|S )NzFrom the content and summary provided, extract 5-10 SEO-optimized keywords that are highly relevant to the news topic:  
    Title: z, Content: z|
    Output format:
    - Return only the refined array of keywords
    - Do not include any explanatory text or commentary rX     r   rU   r;   )r   rC   rN   keywordsr   r   r   get_keywords  s    r   c                 C   s$   d|  d| d}t |ddd}|S )NzUsing the following keywords and summary, create a catchy SEO-optimized title for the news article. 
    Make it 150 characters or less and include at least one power word. Keywords: z. Summary: z
    4. Output format:
        - Return only the suggested title
        - Do not include any explanatory text, quotation marks, or additional commentaryrX   r   r   rU   )r   r   rN   r   r   r   r   generate_title  s    r   c                 C   s2   d|  d| d| d}t |ddd}t|}|S )NzUsing the following title, keywords, and summary, generate a set of 5-7 SEO-friendly keyphrases. 
    Ensure the keyphrases are relevant and likely to rank well in search engines. 
    Title: , Keywords: , Summary: ai  
    3. Output format:
    - Return results as an array of strings
    - Each keyphrase should be its own element in the array
    - Do not include any explanatory text or commentary
    - If fewer than 10 suitable keyphrases are found, include only those that meet the criteria

    Example output format:
    ["keyphrase 1", "keyphrase 2", "keyphrase 3", ...]rX   r   r   r   )r   r   r   rN   
keyphrasesr   r   r   get_keyphrases+  s    r  c                 C   s(   d|  d| d| d}t |dd}|S )Nz_Task: Generate an SEO-optimized news blog's meta description:

    Inputs:
        - Keywords: z
        - Key phrases: z
        - Reference Text: z

    Output format:
        - Provide only the 150-word summary
        - DO NOT OUTPUT ANY OTHER TEXT WITH THE RESPONSE
        r   r   r   )r   r  r   rN   r   r   r   generate_summary>  s    r  c              	   C   s   d|  d| d| d| d	}t |dd}td t|}g }|D ]:}z|t| td	| W qB   tt  Y qBX qB|S )
Nz
        Task: Suggest two SEO-optimizing AI image descriptions (DALL-E 3) for News Article.

        Inputs:
        a. Keywords: z
        b. Title: z
        c. Context: z
        d. KeyPhrases: a  
        
    Output format:
                        [
                        "Detailed description of first image, incorporating relevant keywords and focusing on a key aspect of the blog topic or Farmonaut's services.",
                        "Detailed description of second image, highlighting a different facet of the blog content or Farmonaut's technology, using appropriate keywords."
                        ]

                        Note: Focus on creating descriptions that would result in images that add significant value to the blog post while optimizing for search engines. Ensure descriptions are distinct from each other and highly relevant to the content. DO NOT OUTPUT ANY OTHER TEXT WITH THE RESPONSE.
        rX   r   z
Image Descriptions:z	media url)rU   r   r;   r   rb   r   r   )r   r   r   r  rN   image_descriptionsr   image_descriptionr   r   r   generate_imagesP  s(    r  c                 C   sB   d|  d| d| d| d| d| d| d}t |d	d
d}|S )NzUsing the title, keywords, keyphrases, and summary, write a news article of upto 1500 words. 
    Make sure the article is SEO-optimized, informative, engaging, and integrates the keyphrases naturally. 
    Title: r   z, Keyphrases: r  z

    Key requirements:
    - Use HTML formatting including <strong>, <h1>, <h2>, <i>, <u>, <ol>, <ul>, <br>, and <p> tags where appropriate
    - Include and naturally incorporate as many of these keywords: z"
    - Include these key phrases: z
    - Write in the language of keywords/keywords if keywords/phrases are not in English
    - Mandatorily Localize the content if location names are available in keywords.
    - Add all the images from this JSON object: a   with border-radius:16px, box-shadow: 10px 10px 15px, cursor: pointer. These images should open https://farmonaut.com/app_redirect when clicked. All images should be placed within 75% content of the blog.
    - Add links:
    - App Button Image: https://farmonaut.com/Images/web_app_button.png, Link on this button: https://farmonaut.com/app_redirect, height: 80px
    - API: https://sat.farmonaut.com/api
    - API Developer Docs:https://farmonaut.com/farmonaut-satellite-weather-api-developer-docs/
    - Android App Button Image: https://farmonaut.com/wp-content/uploads/2020/01/get_it_on_google_play.png, Link on this Button: https://play.google.com/store/apps/details?id=com.farmonaut.android, height: 80px
    - iOS App Button Image: https://farmonaut.com/wp-content/uploads/2020/01/available_on_app_store.png, Link on this Button: https://apps.apple.com/in/app/farmonaut/id1489095847, height: 80px
    - Distribute these links within top 75% content of the blog with bold font
    - Use bullet points and subheadings (font color: #034d5c) to improve readability
    - Make the content mobile responsive

    Additional guidelines:
    - Achieve Flesch Reading Ease Score of at least 60
    - Provide detailed explanations and examples
    - Ensure all content is factual and based on provided information
    - Organize information logically with clear transitions between sections
    - Use varied sentence structures and vocabulary for engaging reading
    - Mandatorily implement all the latest SEO guidelines provided by Google News

    Please generate the news article  based on these requirements, ensuring it's well-structured, and upto 1500 words long.

    rX   r   r   r   )r   r   r  r   mediarN   news_contentr   r   r   generate_news_articleu  s"    
!r
  c                 C   sL  |  dd}|  dd}|}|}t||}t||}td| td t|||}t|||}t||||}	t|	|}
td |g d}|
r|d |
 |
r|
d	 d
 nd |d< |d }t	|||||}td dt
| }dd |
D }t|||}dt| d}||d< || |d< d|d< ||d< d|d< dg|d< t| |S )Nr   zNo title availablebodyzNo content availablezGetting keyphraseszMedia Processed)r   r  r  r   r   featured_mediaznews content processed 
zhttps://farmonaut.com/news/c                 S   s    g | ]}|d  dkr|d qS )r   r   r_   r   )r   r   r   r   r   
<listcomp>  s      z5generate_and_publish_news_article.<locals>.<listcomp>z#<script type="application/ld+json">z	</script>rC   publishstatusexcerptrj   comment_statusi=  
categories)r    r   r   r   r  r  r  r   extendr
  r6   r   r   dumpsr   )r'   Ztitle_articleZcontent_articleZnews_summaryr   r   r   r  r  r   r   r   r  r	  r   r   Zstructured_schemaZstructured_schema_scriptr   r   r   !generate_and_publish_news_article  sB    





r  c                 C   sj   t | }|dkrtd dS d| }t| ddD ]4}zt| t| W q0   tt  Y q0X q0dS )z
    Publish news articles spaced equally over 24 hours.

    Args:
        articles (list): List of articles to publish.
        generate_and_publish_news_article (func): Function to generate and publish an article.
    r   zNo articles to publish.NiQ zProcessing News articles)desc)r   r   r   r  timesleepr   r   )articlesZtotal_articlesintervalr'   r   r   r   publish_news_spaced  s    r  )r   )r   ).openair   rR   r   urllib.parser   requests.authr   r   r7   PILr   r3   r.   eventregistryr   
fuzzywuzzyr   r	   r
   r  r  r   r(   r6   r;   rQ   rU   rb   rh   r   r   r   r   r   r   r   r   r   r   r  r  r  r
  r  r  r   r   r   r   <module>   s              %

`
'%3!%%'<